##// END OF EJS Templates
win32mbcs: avoid unintentional failure at colorization...
FUJIWARA Katsunori -
r32566:377c74ef stable
parent child Browse files
Show More
@@ -1,195 +1,195
1 1 # win32mbcs.py -- MBCS filename support for Mercurial
2 2 #
3 3 # Copyright (c) 2008 Shun-ichi Goto <shunichi.goto@gmail.com>
4 4 #
5 5 # Version: 0.3
6 6 # Author: Shun-ichi Goto <shunichi.goto@gmail.com>
7 7 #
8 8 # This software may be used and distributed according to the terms of the
9 9 # GNU General Public License version 2 or any later version.
10 10 #
11 11
12 12 '''allow the use of MBCS paths with problematic encodings
13 13
14 14 Some MBCS encodings are not good for some path operations (i.e.
15 15 splitting path, case conversion, etc.) with its encoded bytes. We call
16 16 such a encoding (i.e. shift_jis and big5) as "problematic encoding".
17 17 This extension can be used to fix the issue with those encodings by
18 18 wrapping some functions to convert to Unicode string before path
19 19 operation.
20 20
21 21 This extension is useful for:
22 22
23 23 - Japanese Windows users using shift_jis encoding.
24 24 - Chinese Windows users using big5 encoding.
25 25 - All users who use a repository with one of problematic encodings on
26 26 case-insensitive file system.
27 27
28 28 This extension is not needed for:
29 29
30 30 - Any user who use only ASCII chars in path.
31 31 - Any user who do not use any of problematic encodings.
32 32
33 33 Note that there are some limitations on using this extension:
34 34
35 35 - You should use single encoding in one repository.
36 36 - If the repository path ends with 0x5c, .hg/hgrc cannot be read.
37 37 - win32mbcs is not compatible with fixutf8 extension.
38 38
39 39 By default, win32mbcs uses encoding.encoding decided by Mercurial.
40 40 You can specify the encoding by config option::
41 41
42 42 [win32mbcs]
43 43 encoding = sjis
44 44
45 45 It is useful for the users who want to commit with UTF-8 log message.
46 46 '''
47 47 from __future__ import absolute_import
48 48
49 49 import os
50 50 import sys
51 51
52 52 from mercurial.i18n import _
53 53 from mercurial import (
54 54 encoding,
55 55 error,
56 56 pycompat,
57 57 )
58 58
59 59 # Note for extension authors: ONLY specify testedwith = 'ships-with-hg-core' for
60 60 # extensions which SHIP WITH MERCURIAL. Non-mainline extensions should
61 61 # be specifying the version(s) of Mercurial they are tested with, or
62 62 # leave the attribute unspecified.
63 63 testedwith = 'ships-with-hg-core'
64 64
65 65 _encoding = None # see extsetup
66 66
67 67 def decode(arg):
68 68 if isinstance(arg, str):
69 69 uarg = arg.decode(_encoding)
70 70 if arg == uarg.encode(_encoding):
71 71 return uarg
72 72 raise UnicodeError("Not local encoding")
73 73 elif isinstance(arg, tuple):
74 74 return tuple(map(decode, arg))
75 75 elif isinstance(arg, list):
76 76 return map(decode, arg)
77 77 elif isinstance(arg, dict):
78 78 for k, v in arg.items():
79 79 arg[k] = decode(v)
80 80 return arg
81 81
82 82 def encode(arg):
83 83 if isinstance(arg, unicode):
84 84 return arg.encode(_encoding)
85 85 elif isinstance(arg, tuple):
86 86 return tuple(map(encode, arg))
87 87 elif isinstance(arg, list):
88 88 return map(encode, arg)
89 89 elif isinstance(arg, dict):
90 90 for k, v in arg.items():
91 91 arg[k] = encode(v)
92 92 return arg
93 93
94 94 def appendsep(s):
95 95 # ensure the path ends with os.sep, appending it if necessary.
96 96 try:
97 97 us = decode(s)
98 98 except UnicodeError:
99 99 us = s
100 100 if us and us[-1] not in ':/\\':
101 101 s += pycompat.ossep
102 102 return s
103 103
104 104
105 105 def basewrapper(func, argtype, enc, dec, args, kwds):
106 106 # check check already converted, then call original
107 107 for arg in args:
108 108 if isinstance(arg, argtype):
109 109 return func(*args, **kwds)
110 110
111 111 try:
112 112 # convert string arguments, call func, then convert back the
113 113 # return value.
114 114 return enc(func(*dec(args), **dec(kwds)))
115 115 except UnicodeError:
116 116 raise error.Abort(_("[win32mbcs] filename conversion failed with"
117 117 " %s encoding\n") % (_encoding))
118 118
119 119 def wrapper(func, args, kwds):
120 120 return basewrapper(func, unicode, encode, decode, args, kwds)
121 121
122 122
123 123 def reversewrapper(func, args, kwds):
124 124 return basewrapper(func, str, decode, encode, args, kwds)
125 125
126 126 def wrapperforlistdir(func, args, kwds):
127 127 # Ensure 'path' argument ends with os.sep to avoids
128 128 # misinterpreting last 0x5c of MBCS 2nd byte as path separator.
129 129 if args:
130 130 args = list(args)
131 131 args[0] = appendsep(args[0])
132 132 if 'path' in kwds:
133 133 kwds['path'] = appendsep(kwds['path'])
134 134 return func(*args, **kwds)
135 135
136 136 def wrapname(name, wrapper):
137 137 module, name = name.rsplit('.', 1)
138 138 module = sys.modules[module]
139 139 func = getattr(module, name)
140 140 def f(*args, **kwds):
141 141 return wrapper(func, args, kwds)
142 142 f.__name__ = func.__name__
143 143 setattr(module, name, f)
144 144
145 145 # List of functions to be wrapped.
146 146 # NOTE: os.path.dirname() and os.path.basename() are safe because
147 147 # they use result of os.path.split()
148 148 funcs = '''os.path.join os.path.split os.path.splitext
149 149 os.path.normpath os.makedirs mercurial.util.endswithsep
150 150 mercurial.util.splitpath mercurial.util.fscasesensitive
151 151 mercurial.util.fspath mercurial.util.pconvert mercurial.util.normpath
152 152 mercurial.util.checkwinfilename mercurial.util.checkosfilename
153 153 mercurial.util.split'''
154 154
155 155 # These functions are required to be called with local encoded string
156 156 # because they expects argument is local encoded string and cause
157 157 # problem with unicode string.
158 158 rfuncs = '''mercurial.encoding.upper mercurial.encoding.lower
159 mercurial.pycompat.bytestr'''
159 mercurial.util._filenamebytestr'''
160 160
161 161 # List of Windows specific functions to be wrapped.
162 162 winfuncs = '''os.path.splitunc'''
163 163
164 164 # codec and alias names of sjis and big5 to be faked.
165 165 problematic_encodings = '''big5 big5-tw csbig5 big5hkscs big5-hkscs
166 166 hkscs cp932 932 ms932 mskanji ms-kanji shift_jis csshiftjis shiftjis
167 167 sjis s_jis shift_jis_2004 shiftjis2004 sjis_2004 sjis2004
168 168 shift_jisx0213 shiftjisx0213 sjisx0213 s_jisx0213 950 cp950 ms950 '''
169 169
170 170 def extsetup(ui):
171 171 # TODO: decide use of config section for this extension
172 172 if ((not os.path.supports_unicode_filenames) and
173 173 (pycompat.sysplatform != 'cygwin')):
174 174 ui.warn(_("[win32mbcs] cannot activate on this platform.\n"))
175 175 return
176 176 # determine encoding for filename
177 177 global _encoding
178 178 _encoding = ui.config('win32mbcs', 'encoding', encoding.encoding)
179 179 # fake is only for relevant environment.
180 180 if _encoding.lower() in problematic_encodings.split():
181 181 for f in funcs.split():
182 182 wrapname(f, wrapper)
183 183 if pycompat.osname == 'nt':
184 184 for f in winfuncs.split():
185 185 wrapname(f, wrapper)
186 186 wrapname("mercurial.osutil.listdir", wrapperforlistdir)
187 187 # wrap functions to be called with local byte string arguments
188 188 for f in rfuncs.split():
189 189 wrapname(f, reversewrapper)
190 190 # Check sys.args manually instead of using ui.debug() because
191 191 # command line options is not yet applied when
192 192 # extensions.loadall() is called.
193 193 if '--debug' in sys.argv:
194 194 ui.write(("[win32mbcs] activated with encoding: %s\n")
195 195 % _encoding)
@@ -1,3746 +1,3749
1 1 # util.py - Mercurial utility functions and platform specific implementations
2 2 #
3 3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 6 #
7 7 # This software may be used and distributed according to the terms of the
8 8 # GNU General Public License version 2 or any later version.
9 9
10 10 """Mercurial utility functions and platform specific implementations.
11 11
12 12 This contains helper routines that are independent of the SCM core and
13 13 hide platform-specific details from the core.
14 14 """
15 15
16 16 from __future__ import absolute_import
17 17
18 18 import bz2
19 19 import calendar
20 20 import codecs
21 21 import collections
22 22 import datetime
23 23 import errno
24 24 import gc
25 25 import hashlib
26 26 import imp
27 27 import os
28 28 import platform as pyplatform
29 29 import re as remod
30 30 import shutil
31 31 import signal
32 32 import socket
33 33 import stat
34 34 import string
35 35 import subprocess
36 36 import sys
37 37 import tempfile
38 38 import textwrap
39 39 import time
40 40 import traceback
41 41 import warnings
42 42 import zlib
43 43
44 44 from . import (
45 45 encoding,
46 46 error,
47 47 i18n,
48 48 osutil,
49 49 parsers,
50 50 pycompat,
51 51 )
52 52
53 53 cookielib = pycompat.cookielib
54 54 empty = pycompat.empty
55 55 httplib = pycompat.httplib
56 56 httpserver = pycompat.httpserver
57 57 pickle = pycompat.pickle
58 58 queue = pycompat.queue
59 59 socketserver = pycompat.socketserver
60 60 stderr = pycompat.stderr
61 61 stdin = pycompat.stdin
62 62 stdout = pycompat.stdout
63 63 stringio = pycompat.stringio
64 64 urlerr = pycompat.urlerr
65 65 urlreq = pycompat.urlreq
66 66 xmlrpclib = pycompat.xmlrpclib
67 67
68 # workaround for win32mbcs
69 _filenamebytestr = pycompat.bytestr
70
68 71 def isatty(fp):
69 72 try:
70 73 return fp.isatty()
71 74 except AttributeError:
72 75 return False
73 76
74 77 # glibc determines buffering on first write to stdout - if we replace a TTY
75 78 # destined stdout with a pipe destined stdout (e.g. pager), we want line
76 79 # buffering
77 80 if isatty(stdout):
78 81 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
79 82
80 83 if pycompat.osname == 'nt':
81 84 from . import windows as platform
82 85 stdout = platform.winstdout(stdout)
83 86 else:
84 87 from . import posix as platform
85 88
86 89 _ = i18n._
87 90
88 91 bindunixsocket = platform.bindunixsocket
89 92 cachestat = platform.cachestat
90 93 checkexec = platform.checkexec
91 94 checklink = platform.checklink
92 95 copymode = platform.copymode
93 96 executablepath = platform.executablepath
94 97 expandglobs = platform.expandglobs
95 98 explainexit = platform.explainexit
96 99 findexe = platform.findexe
97 100 gethgcmd = platform.gethgcmd
98 101 getuser = platform.getuser
99 102 getpid = os.getpid
100 103 groupmembers = platform.groupmembers
101 104 groupname = platform.groupname
102 105 hidewindow = platform.hidewindow
103 106 isexec = platform.isexec
104 107 isowner = platform.isowner
105 108 localpath = platform.localpath
106 109 lookupreg = platform.lookupreg
107 110 makedir = platform.makedir
108 111 nlinks = platform.nlinks
109 112 normpath = platform.normpath
110 113 normcase = platform.normcase
111 114 normcasespec = platform.normcasespec
112 115 normcasefallback = platform.normcasefallback
113 116 openhardlinks = platform.openhardlinks
114 117 oslink = platform.oslink
115 118 parsepatchoutput = platform.parsepatchoutput
116 119 pconvert = platform.pconvert
117 120 poll = platform.poll
118 121 popen = platform.popen
119 122 posixfile = platform.posixfile
120 123 quotecommand = platform.quotecommand
121 124 readpipe = platform.readpipe
122 125 rename = platform.rename
123 126 removedirs = platform.removedirs
124 127 samedevice = platform.samedevice
125 128 samefile = platform.samefile
126 129 samestat = platform.samestat
127 130 setbinary = platform.setbinary
128 131 setflags = platform.setflags
129 132 setsignalhandler = platform.setsignalhandler
130 133 shellquote = platform.shellquote
131 134 spawndetached = platform.spawndetached
132 135 split = platform.split
133 136 sshargs = platform.sshargs
134 137 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
135 138 statisexec = platform.statisexec
136 139 statislink = platform.statislink
137 140 testpid = platform.testpid
138 141 umask = platform.umask
139 142 unlink = platform.unlink
140 143 username = platform.username
141 144
142 145 # Python compatibility
143 146
144 147 _notset = object()
145 148
146 149 # disable Python's problematic floating point timestamps (issue4836)
147 150 # (Python hypocritically says you shouldn't change this behavior in
148 151 # libraries, and sure enough Mercurial is not a library.)
149 152 os.stat_float_times(False)
150 153
151 154 def safehasattr(thing, attr):
152 155 return getattr(thing, attr, _notset) is not _notset
153 156
154 157 def bitsfrom(container):
155 158 bits = 0
156 159 for bit in container:
157 160 bits |= bit
158 161 return bits
159 162
160 163 # python 2.6 still have deprecation warning enabled by default. We do not want
161 164 # to display anything to standard user so detect if we are running test and
162 165 # only use python deprecation warning in this case.
163 166 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
164 167 if _dowarn:
165 168 # explicitly unfilter our warning for python 2.7
166 169 #
167 170 # The option of setting PYTHONWARNINGS in the test runner was investigated.
168 171 # However, module name set through PYTHONWARNINGS was exactly matched, so
169 172 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
170 173 # makes the whole PYTHONWARNINGS thing useless for our usecase.
171 174 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
172 175 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
173 176 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
174 177
175 178 def nouideprecwarn(msg, version, stacklevel=1):
176 179 """Issue an python native deprecation warning
177 180
178 181 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
179 182 """
180 183 if _dowarn:
181 184 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
182 185 " update your code.)") % version
183 186 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
184 187
185 188 DIGESTS = {
186 189 'md5': hashlib.md5,
187 190 'sha1': hashlib.sha1,
188 191 'sha512': hashlib.sha512,
189 192 }
190 193 # List of digest types from strongest to weakest
191 194 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
192 195
193 196 for k in DIGESTS_BY_STRENGTH:
194 197 assert k in DIGESTS
195 198
196 199 class digester(object):
197 200 """helper to compute digests.
198 201
199 202 This helper can be used to compute one or more digests given their name.
200 203
201 204 >>> d = digester(['md5', 'sha1'])
202 205 >>> d.update('foo')
203 206 >>> [k for k in sorted(d)]
204 207 ['md5', 'sha1']
205 208 >>> d['md5']
206 209 'acbd18db4cc2f85cedef654fccc4a4d8'
207 210 >>> d['sha1']
208 211 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
209 212 >>> digester.preferred(['md5', 'sha1'])
210 213 'sha1'
211 214 """
212 215
213 216 def __init__(self, digests, s=''):
214 217 self._hashes = {}
215 218 for k in digests:
216 219 if k not in DIGESTS:
217 220 raise Abort(_('unknown digest type: %s') % k)
218 221 self._hashes[k] = DIGESTS[k]()
219 222 if s:
220 223 self.update(s)
221 224
222 225 def update(self, data):
223 226 for h in self._hashes.values():
224 227 h.update(data)
225 228
226 229 def __getitem__(self, key):
227 230 if key not in DIGESTS:
228 231 raise Abort(_('unknown digest type: %s') % k)
229 232 return self._hashes[key].hexdigest()
230 233
231 234 def __iter__(self):
232 235 return iter(self._hashes)
233 236
234 237 @staticmethod
235 238 def preferred(supported):
236 239 """returns the strongest digest type in both supported and DIGESTS."""
237 240
238 241 for k in DIGESTS_BY_STRENGTH:
239 242 if k in supported:
240 243 return k
241 244 return None
242 245
243 246 class digestchecker(object):
244 247 """file handle wrapper that additionally checks content against a given
245 248 size and digests.
246 249
247 250 d = digestchecker(fh, size, {'md5': '...'})
248 251
249 252 When multiple digests are given, all of them are validated.
250 253 """
251 254
252 255 def __init__(self, fh, size, digests):
253 256 self._fh = fh
254 257 self._size = size
255 258 self._got = 0
256 259 self._digests = dict(digests)
257 260 self._digester = digester(self._digests.keys())
258 261
259 262 def read(self, length=-1):
260 263 content = self._fh.read(length)
261 264 self._digester.update(content)
262 265 self._got += len(content)
263 266 return content
264 267
265 268 def validate(self):
266 269 if self._size != self._got:
267 270 raise Abort(_('size mismatch: expected %d, got %d') %
268 271 (self._size, self._got))
269 272 for k, v in self._digests.items():
270 273 if v != self._digester[k]:
271 274 # i18n: first parameter is a digest name
272 275 raise Abort(_('%s mismatch: expected %s, got %s') %
273 276 (k, v, self._digester[k]))
274 277
275 278 try:
276 279 buffer = buffer
277 280 except NameError:
278 281 if not pycompat.ispy3:
279 282 def buffer(sliceable, offset=0, length=None):
280 283 if length is not None:
281 284 return sliceable[offset:offset + length]
282 285 return sliceable[offset:]
283 286 else:
284 287 def buffer(sliceable, offset=0, length=None):
285 288 if length is not None:
286 289 return memoryview(sliceable)[offset:offset + length]
287 290 return memoryview(sliceable)[offset:]
288 291
289 292 closefds = pycompat.osname == 'posix'
290 293
291 294 _chunksize = 4096
292 295
293 296 class bufferedinputpipe(object):
294 297 """a manually buffered input pipe
295 298
296 299 Python will not let us use buffered IO and lazy reading with 'polling' at
297 300 the same time. We cannot probe the buffer state and select will not detect
298 301 that data are ready to read if they are already buffered.
299 302
300 303 This class let us work around that by implementing its own buffering
301 304 (allowing efficient readline) while offering a way to know if the buffer is
302 305 empty from the output (allowing collaboration of the buffer with polling).
303 306
304 307 This class lives in the 'util' module because it makes use of the 'os'
305 308 module from the python stdlib.
306 309 """
307 310
308 311 def __init__(self, input):
309 312 self._input = input
310 313 self._buffer = []
311 314 self._eof = False
312 315 self._lenbuf = 0
313 316
314 317 @property
315 318 def hasbuffer(self):
316 319 """True is any data is currently buffered
317 320
318 321 This will be used externally a pre-step for polling IO. If there is
319 322 already data then no polling should be set in place."""
320 323 return bool(self._buffer)
321 324
322 325 @property
323 326 def closed(self):
324 327 return self._input.closed
325 328
326 329 def fileno(self):
327 330 return self._input.fileno()
328 331
329 332 def close(self):
330 333 return self._input.close()
331 334
332 335 def read(self, size):
333 336 while (not self._eof) and (self._lenbuf < size):
334 337 self._fillbuffer()
335 338 return self._frombuffer(size)
336 339
337 340 def readline(self, *args, **kwargs):
338 341 if 1 < len(self._buffer):
339 342 # this should not happen because both read and readline end with a
340 343 # _frombuffer call that collapse it.
341 344 self._buffer = [''.join(self._buffer)]
342 345 self._lenbuf = len(self._buffer[0])
343 346 lfi = -1
344 347 if self._buffer:
345 348 lfi = self._buffer[-1].find('\n')
346 349 while (not self._eof) and lfi < 0:
347 350 self._fillbuffer()
348 351 if self._buffer:
349 352 lfi = self._buffer[-1].find('\n')
350 353 size = lfi + 1
351 354 if lfi < 0: # end of file
352 355 size = self._lenbuf
353 356 elif 1 < len(self._buffer):
354 357 # we need to take previous chunks into account
355 358 size += self._lenbuf - len(self._buffer[-1])
356 359 return self._frombuffer(size)
357 360
358 361 def _frombuffer(self, size):
359 362 """return at most 'size' data from the buffer
360 363
361 364 The data are removed from the buffer."""
362 365 if size == 0 or not self._buffer:
363 366 return ''
364 367 buf = self._buffer[0]
365 368 if 1 < len(self._buffer):
366 369 buf = ''.join(self._buffer)
367 370
368 371 data = buf[:size]
369 372 buf = buf[len(data):]
370 373 if buf:
371 374 self._buffer = [buf]
372 375 self._lenbuf = len(buf)
373 376 else:
374 377 self._buffer = []
375 378 self._lenbuf = 0
376 379 return data
377 380
378 381 def _fillbuffer(self):
379 382 """read data to the buffer"""
380 383 data = os.read(self._input.fileno(), _chunksize)
381 384 if not data:
382 385 self._eof = True
383 386 else:
384 387 self._lenbuf += len(data)
385 388 self._buffer.append(data)
386 389
387 390 def popen2(cmd, env=None, newlines=False):
388 391 # Setting bufsize to -1 lets the system decide the buffer size.
389 392 # The default for bufsize is 0, meaning unbuffered. This leads to
390 393 # poor performance on Mac OS X: http://bugs.python.org/issue4194
391 394 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
392 395 close_fds=closefds,
393 396 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
394 397 universal_newlines=newlines,
395 398 env=env)
396 399 return p.stdin, p.stdout
397 400
398 401 def popen3(cmd, env=None, newlines=False):
399 402 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
400 403 return stdin, stdout, stderr
401 404
402 405 def popen4(cmd, env=None, newlines=False, bufsize=-1):
403 406 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
404 407 close_fds=closefds,
405 408 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
406 409 stderr=subprocess.PIPE,
407 410 universal_newlines=newlines,
408 411 env=env)
409 412 return p.stdin, p.stdout, p.stderr, p
410 413
411 414 def version():
412 415 """Return version information if available."""
413 416 try:
414 417 from . import __version__
415 418 return __version__.version
416 419 except ImportError:
417 420 return 'unknown'
418 421
419 422 def versiontuple(v=None, n=4):
420 423 """Parses a Mercurial version string into an N-tuple.
421 424
422 425 The version string to be parsed is specified with the ``v`` argument.
423 426 If it isn't defined, the current Mercurial version string will be parsed.
424 427
425 428 ``n`` can be 2, 3, or 4. Here is how some version strings map to
426 429 returned values:
427 430
428 431 >>> v = '3.6.1+190-df9b73d2d444'
429 432 >>> versiontuple(v, 2)
430 433 (3, 6)
431 434 >>> versiontuple(v, 3)
432 435 (3, 6, 1)
433 436 >>> versiontuple(v, 4)
434 437 (3, 6, 1, '190-df9b73d2d444')
435 438
436 439 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
437 440 (3, 6, 1, '190-df9b73d2d444+20151118')
438 441
439 442 >>> v = '3.6'
440 443 >>> versiontuple(v, 2)
441 444 (3, 6)
442 445 >>> versiontuple(v, 3)
443 446 (3, 6, None)
444 447 >>> versiontuple(v, 4)
445 448 (3, 6, None, None)
446 449
447 450 >>> v = '3.9-rc'
448 451 >>> versiontuple(v, 2)
449 452 (3, 9)
450 453 >>> versiontuple(v, 3)
451 454 (3, 9, None)
452 455 >>> versiontuple(v, 4)
453 456 (3, 9, None, 'rc')
454 457
455 458 >>> v = '3.9-rc+2-02a8fea4289b'
456 459 >>> versiontuple(v, 2)
457 460 (3, 9)
458 461 >>> versiontuple(v, 3)
459 462 (3, 9, None)
460 463 >>> versiontuple(v, 4)
461 464 (3, 9, None, 'rc+2-02a8fea4289b')
462 465 """
463 466 if not v:
464 467 v = version()
465 468 parts = remod.split('[\+-]', v, 1)
466 469 if len(parts) == 1:
467 470 vparts, extra = parts[0], None
468 471 else:
469 472 vparts, extra = parts
470 473
471 474 vints = []
472 475 for i in vparts.split('.'):
473 476 try:
474 477 vints.append(int(i))
475 478 except ValueError:
476 479 break
477 480 # (3, 6) -> (3, 6, None)
478 481 while len(vints) < 3:
479 482 vints.append(None)
480 483
481 484 if n == 2:
482 485 return (vints[0], vints[1])
483 486 if n == 3:
484 487 return (vints[0], vints[1], vints[2])
485 488 if n == 4:
486 489 return (vints[0], vints[1], vints[2], extra)
487 490
488 491 # used by parsedate
489 492 defaultdateformats = (
490 493 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
491 494 '%Y-%m-%dT%H:%M', # without seconds
492 495 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
493 496 '%Y-%m-%dT%H%M', # without seconds
494 497 '%Y-%m-%d %H:%M:%S', # our common legal variant
495 498 '%Y-%m-%d %H:%M', # without seconds
496 499 '%Y-%m-%d %H%M%S', # without :
497 500 '%Y-%m-%d %H%M', # without seconds
498 501 '%Y-%m-%d %I:%M:%S%p',
499 502 '%Y-%m-%d %H:%M',
500 503 '%Y-%m-%d %I:%M%p',
501 504 '%Y-%m-%d',
502 505 '%m-%d',
503 506 '%m/%d',
504 507 '%m/%d/%y',
505 508 '%m/%d/%Y',
506 509 '%a %b %d %H:%M:%S %Y',
507 510 '%a %b %d %I:%M:%S%p %Y',
508 511 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
509 512 '%b %d %H:%M:%S %Y',
510 513 '%b %d %I:%M:%S%p %Y',
511 514 '%b %d %H:%M:%S',
512 515 '%b %d %I:%M:%S%p',
513 516 '%b %d %H:%M',
514 517 '%b %d %I:%M%p',
515 518 '%b %d %Y',
516 519 '%b %d',
517 520 '%H:%M:%S',
518 521 '%I:%M:%S%p',
519 522 '%H:%M',
520 523 '%I:%M%p',
521 524 )
522 525
523 526 extendeddateformats = defaultdateformats + (
524 527 "%Y",
525 528 "%Y-%m",
526 529 "%b",
527 530 "%b %Y",
528 531 )
529 532
530 533 def cachefunc(func):
531 534 '''cache the result of function calls'''
532 535 # XXX doesn't handle keywords args
533 536 if func.__code__.co_argcount == 0:
534 537 cache = []
535 538 def f():
536 539 if len(cache) == 0:
537 540 cache.append(func())
538 541 return cache[0]
539 542 return f
540 543 cache = {}
541 544 if func.__code__.co_argcount == 1:
542 545 # we gain a small amount of time because
543 546 # we don't need to pack/unpack the list
544 547 def f(arg):
545 548 if arg not in cache:
546 549 cache[arg] = func(arg)
547 550 return cache[arg]
548 551 else:
549 552 def f(*args):
550 553 if args not in cache:
551 554 cache[args] = func(*args)
552 555 return cache[args]
553 556
554 557 return f
555 558
556 559 class sortdict(dict):
557 560 '''a simple sorted dictionary'''
558 561 def __init__(self, data=None):
559 562 self._list = []
560 563 if data:
561 564 self.update(data)
562 565 def copy(self):
563 566 return sortdict(self)
564 567 def __setitem__(self, key, val):
565 568 if key in self:
566 569 self._list.remove(key)
567 570 self._list.append(key)
568 571 dict.__setitem__(self, key, val)
569 572 def __iter__(self):
570 573 return self._list.__iter__()
571 574 def update(self, src):
572 575 if isinstance(src, dict):
573 576 src = src.iteritems()
574 577 for k, v in src:
575 578 self[k] = v
576 579 def clear(self):
577 580 dict.clear(self)
578 581 self._list = []
579 582 def items(self):
580 583 return [(k, self[k]) for k in self._list]
581 584 def __delitem__(self, key):
582 585 dict.__delitem__(self, key)
583 586 self._list.remove(key)
584 587 def pop(self, key, *args, **kwargs):
585 588 try:
586 589 self._list.remove(key)
587 590 except ValueError:
588 591 pass
589 592 return dict.pop(self, key, *args, **kwargs)
590 593 def keys(self):
591 594 return self._list[:]
592 595 def iterkeys(self):
593 596 return self._list.__iter__()
594 597 def iteritems(self):
595 598 for k in self._list:
596 599 yield k, self[k]
597 600 def insert(self, index, key, val):
598 601 self._list.insert(index, key)
599 602 dict.__setitem__(self, key, val)
600 603 def __repr__(self):
601 604 if not self:
602 605 return '%s()' % self.__class__.__name__
603 606 return '%s(%r)' % (self.__class__.__name__, self.items())
604 607
605 608 class _lrucachenode(object):
606 609 """A node in a doubly linked list.
607 610
608 611 Holds a reference to nodes on either side as well as a key-value
609 612 pair for the dictionary entry.
610 613 """
611 614 __slots__ = (u'next', u'prev', u'key', u'value')
612 615
613 616 def __init__(self):
614 617 self.next = None
615 618 self.prev = None
616 619
617 620 self.key = _notset
618 621 self.value = None
619 622
620 623 def markempty(self):
621 624 """Mark the node as emptied."""
622 625 self.key = _notset
623 626
624 627 class lrucachedict(object):
625 628 """Dict that caches most recent accesses and sets.
626 629
627 630 The dict consists of an actual backing dict - indexed by original
628 631 key - and a doubly linked circular list defining the order of entries in
629 632 the cache.
630 633
631 634 The head node is the newest entry in the cache. If the cache is full,
632 635 we recycle head.prev and make it the new head. Cache accesses result in
633 636 the node being moved to before the existing head and being marked as the
634 637 new head node.
635 638 """
636 639 def __init__(self, max):
637 640 self._cache = {}
638 641
639 642 self._head = head = _lrucachenode()
640 643 head.prev = head
641 644 head.next = head
642 645 self._size = 1
643 646 self._capacity = max
644 647
645 648 def __len__(self):
646 649 return len(self._cache)
647 650
648 651 def __contains__(self, k):
649 652 return k in self._cache
650 653
651 654 def __iter__(self):
652 655 # We don't have to iterate in cache order, but why not.
653 656 n = self._head
654 657 for i in range(len(self._cache)):
655 658 yield n.key
656 659 n = n.next
657 660
658 661 def __getitem__(self, k):
659 662 node = self._cache[k]
660 663 self._movetohead(node)
661 664 return node.value
662 665
663 666 def __setitem__(self, k, v):
664 667 node = self._cache.get(k)
665 668 # Replace existing value and mark as newest.
666 669 if node is not None:
667 670 node.value = v
668 671 self._movetohead(node)
669 672 return
670 673
671 674 if self._size < self._capacity:
672 675 node = self._addcapacity()
673 676 else:
674 677 # Grab the last/oldest item.
675 678 node = self._head.prev
676 679
677 680 # At capacity. Kill the old entry.
678 681 if node.key is not _notset:
679 682 del self._cache[node.key]
680 683
681 684 node.key = k
682 685 node.value = v
683 686 self._cache[k] = node
684 687 # And mark it as newest entry. No need to adjust order since it
685 688 # is already self._head.prev.
686 689 self._head = node
687 690
688 691 def __delitem__(self, k):
689 692 node = self._cache.pop(k)
690 693 node.markempty()
691 694
692 695 # Temporarily mark as newest item before re-adjusting head to make
693 696 # this node the oldest item.
694 697 self._movetohead(node)
695 698 self._head = node.next
696 699
697 700 # Additional dict methods.
698 701
699 702 def get(self, k, default=None):
700 703 try:
701 704 return self._cache[k].value
702 705 except KeyError:
703 706 return default
704 707
705 708 def clear(self):
706 709 n = self._head
707 710 while n.key is not _notset:
708 711 n.markempty()
709 712 n = n.next
710 713
711 714 self._cache.clear()
712 715
713 716 def copy(self):
714 717 result = lrucachedict(self._capacity)
715 718 n = self._head.prev
716 719 # Iterate in oldest-to-newest order, so the copy has the right ordering
717 720 for i in range(len(self._cache)):
718 721 result[n.key] = n.value
719 722 n = n.prev
720 723 return result
721 724
722 725 def _movetohead(self, node):
723 726 """Mark a node as the newest, making it the new head.
724 727
725 728 When a node is accessed, it becomes the freshest entry in the LRU
726 729 list, which is denoted by self._head.
727 730
728 731 Visually, let's make ``N`` the new head node (* denotes head):
729 732
730 733 previous/oldest <-> head <-> next/next newest
731 734
732 735 ----<->--- A* ---<->-----
733 736 | |
734 737 E <-> D <-> N <-> C <-> B
735 738
736 739 To:
737 740
738 741 ----<->--- N* ---<->-----
739 742 | |
740 743 E <-> D <-> C <-> B <-> A
741 744
742 745 This requires the following moves:
743 746
744 747 C.next = D (node.prev.next = node.next)
745 748 D.prev = C (node.next.prev = node.prev)
746 749 E.next = N (head.prev.next = node)
747 750 N.prev = E (node.prev = head.prev)
748 751 N.next = A (node.next = head)
749 752 A.prev = N (head.prev = node)
750 753 """
751 754 head = self._head
752 755 # C.next = D
753 756 node.prev.next = node.next
754 757 # D.prev = C
755 758 node.next.prev = node.prev
756 759 # N.prev = E
757 760 node.prev = head.prev
758 761 # N.next = A
759 762 # It is tempting to do just "head" here, however if node is
760 763 # adjacent to head, this will do bad things.
761 764 node.next = head.prev.next
762 765 # E.next = N
763 766 node.next.prev = node
764 767 # A.prev = N
765 768 node.prev.next = node
766 769
767 770 self._head = node
768 771
769 772 def _addcapacity(self):
770 773 """Add a node to the circular linked list.
771 774
772 775 The new node is inserted before the head node.
773 776 """
774 777 head = self._head
775 778 node = _lrucachenode()
776 779 head.prev.next = node
777 780 node.prev = head.prev
778 781 node.next = head
779 782 head.prev = node
780 783 self._size += 1
781 784 return node
782 785
783 786 def lrucachefunc(func):
784 787 '''cache most recent results of function calls'''
785 788 cache = {}
786 789 order = collections.deque()
787 790 if func.__code__.co_argcount == 1:
788 791 def f(arg):
789 792 if arg not in cache:
790 793 if len(cache) > 20:
791 794 del cache[order.popleft()]
792 795 cache[arg] = func(arg)
793 796 else:
794 797 order.remove(arg)
795 798 order.append(arg)
796 799 return cache[arg]
797 800 else:
798 801 def f(*args):
799 802 if args not in cache:
800 803 if len(cache) > 20:
801 804 del cache[order.popleft()]
802 805 cache[args] = func(*args)
803 806 else:
804 807 order.remove(args)
805 808 order.append(args)
806 809 return cache[args]
807 810
808 811 return f
809 812
810 813 class propertycache(object):
811 814 def __init__(self, func):
812 815 self.func = func
813 816 self.name = func.__name__
814 817 def __get__(self, obj, type=None):
815 818 result = self.func(obj)
816 819 self.cachevalue(obj, result)
817 820 return result
818 821
819 822 def cachevalue(self, obj, value):
820 823 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
821 824 obj.__dict__[self.name] = value
822 825
823 826 def pipefilter(s, cmd):
824 827 '''filter string S through command CMD, returning its output'''
825 828 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
826 829 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
827 830 pout, perr = p.communicate(s)
828 831 return pout
829 832
830 833 def tempfilter(s, cmd):
831 834 '''filter string S through a pair of temporary files with CMD.
832 835 CMD is used as a template to create the real command to be run,
833 836 with the strings INFILE and OUTFILE replaced by the real names of
834 837 the temporary files generated.'''
835 838 inname, outname = None, None
836 839 try:
837 840 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
838 841 fp = os.fdopen(infd, pycompat.sysstr('wb'))
839 842 fp.write(s)
840 843 fp.close()
841 844 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
842 845 os.close(outfd)
843 846 cmd = cmd.replace('INFILE', inname)
844 847 cmd = cmd.replace('OUTFILE', outname)
845 848 code = os.system(cmd)
846 849 if pycompat.sysplatform == 'OpenVMS' and code & 1:
847 850 code = 0
848 851 if code:
849 852 raise Abort(_("command '%s' failed: %s") %
850 853 (cmd, explainexit(code)))
851 854 return readfile(outname)
852 855 finally:
853 856 try:
854 857 if inname:
855 858 os.unlink(inname)
856 859 except OSError:
857 860 pass
858 861 try:
859 862 if outname:
860 863 os.unlink(outname)
861 864 except OSError:
862 865 pass
863 866
864 867 filtertable = {
865 868 'tempfile:': tempfilter,
866 869 'pipe:': pipefilter,
867 870 }
868 871
869 872 def filter(s, cmd):
870 873 "filter a string through a command that transforms its input to its output"
871 874 for name, fn in filtertable.iteritems():
872 875 if cmd.startswith(name):
873 876 return fn(s, cmd[len(name):].lstrip())
874 877 return pipefilter(s, cmd)
875 878
876 879 def binary(s):
877 880 """return true if a string is binary data"""
878 881 return bool(s and '\0' in s)
879 882
880 883 def increasingchunks(source, min=1024, max=65536):
881 884 '''return no less than min bytes per chunk while data remains,
882 885 doubling min after each chunk until it reaches max'''
883 886 def log2(x):
884 887 if not x:
885 888 return 0
886 889 i = 0
887 890 while x:
888 891 x >>= 1
889 892 i += 1
890 893 return i - 1
891 894
892 895 buf = []
893 896 blen = 0
894 897 for chunk in source:
895 898 buf.append(chunk)
896 899 blen += len(chunk)
897 900 if blen >= min:
898 901 if min < max:
899 902 min = min << 1
900 903 nmin = 1 << log2(blen)
901 904 if nmin > min:
902 905 min = nmin
903 906 if min > max:
904 907 min = max
905 908 yield ''.join(buf)
906 909 blen = 0
907 910 buf = []
908 911 if buf:
909 912 yield ''.join(buf)
910 913
911 914 Abort = error.Abort
912 915
913 916 def always(fn):
914 917 return True
915 918
916 919 def never(fn):
917 920 return False
918 921
919 922 def nogc(func):
920 923 """disable garbage collector
921 924
922 925 Python's garbage collector triggers a GC each time a certain number of
923 926 container objects (the number being defined by gc.get_threshold()) are
924 927 allocated even when marked not to be tracked by the collector. Tracking has
925 928 no effect on when GCs are triggered, only on what objects the GC looks
926 929 into. As a workaround, disable GC while building complex (huge)
927 930 containers.
928 931
929 932 This garbage collector issue have been fixed in 2.7.
930 933 """
931 934 if sys.version_info >= (2, 7):
932 935 return func
933 936 def wrapper(*args, **kwargs):
934 937 gcenabled = gc.isenabled()
935 938 gc.disable()
936 939 try:
937 940 return func(*args, **kwargs)
938 941 finally:
939 942 if gcenabled:
940 943 gc.enable()
941 944 return wrapper
942 945
943 946 def pathto(root, n1, n2):
944 947 '''return the relative path from one place to another.
945 948 root should use os.sep to separate directories
946 949 n1 should use os.sep to separate directories
947 950 n2 should use "/" to separate directories
948 951 returns an os.sep-separated path.
949 952
950 953 If n1 is a relative path, it's assumed it's
951 954 relative to root.
952 955 n2 should always be relative to root.
953 956 '''
954 957 if not n1:
955 958 return localpath(n2)
956 959 if os.path.isabs(n1):
957 960 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
958 961 return os.path.join(root, localpath(n2))
959 962 n2 = '/'.join((pconvert(root), n2))
960 963 a, b = splitpath(n1), n2.split('/')
961 964 a.reverse()
962 965 b.reverse()
963 966 while a and b and a[-1] == b[-1]:
964 967 a.pop()
965 968 b.pop()
966 969 b.reverse()
967 970 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
968 971
969 972 def mainfrozen():
970 973 """return True if we are a frozen executable.
971 974
972 975 The code supports py2exe (most common, Windows only) and tools/freeze
973 976 (portable, not much used).
974 977 """
975 978 return (safehasattr(sys, "frozen") or # new py2exe
976 979 safehasattr(sys, "importers") or # old py2exe
977 980 imp.is_frozen(u"__main__")) # tools/freeze
978 981
979 982 # the location of data files matching the source code
980 983 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
981 984 # executable version (py2exe) doesn't support __file__
982 985 datapath = os.path.dirname(pycompat.sysexecutable)
983 986 else:
984 987 datapath = os.path.dirname(pycompat.fsencode(__file__))
985 988
986 989 i18n.setdatapath(datapath)
987 990
988 991 _hgexecutable = None
989 992
990 993 def hgexecutable():
991 994 """return location of the 'hg' executable.
992 995
993 996 Defaults to $HG or 'hg' in the search path.
994 997 """
995 998 if _hgexecutable is None:
996 999 hg = encoding.environ.get('HG')
997 1000 mainmod = sys.modules[pycompat.sysstr('__main__')]
998 1001 if hg:
999 1002 _sethgexecutable(hg)
1000 1003 elif mainfrozen():
1001 1004 if getattr(sys, 'frozen', None) == 'macosx_app':
1002 1005 # Env variable set by py2app
1003 1006 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1004 1007 else:
1005 1008 _sethgexecutable(pycompat.sysexecutable)
1006 1009 elif (os.path.basename(
1007 1010 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1008 1011 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1009 1012 else:
1010 1013 exe = findexe('hg') or os.path.basename(sys.argv[0])
1011 1014 _sethgexecutable(exe)
1012 1015 return _hgexecutable
1013 1016
1014 1017 def _sethgexecutable(path):
1015 1018 """set location of the 'hg' executable"""
1016 1019 global _hgexecutable
1017 1020 _hgexecutable = path
1018 1021
1019 1022 def _isstdout(f):
1020 1023 fileno = getattr(f, 'fileno', None)
1021 1024 return fileno and fileno() == sys.__stdout__.fileno()
1022 1025
1023 1026 def shellenviron(environ=None):
1024 1027 """return environ with optional override, useful for shelling out"""
1025 1028 def py2shell(val):
1026 1029 'convert python object into string that is useful to shell'
1027 1030 if val is None or val is False:
1028 1031 return '0'
1029 1032 if val is True:
1030 1033 return '1'
1031 1034 return str(val)
1032 1035 env = dict(encoding.environ)
1033 1036 if environ:
1034 1037 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1035 1038 env['HG'] = hgexecutable()
1036 1039 return env
1037 1040
1038 1041 def system(cmd, environ=None, cwd=None, out=None):
1039 1042 '''enhanced shell command execution.
1040 1043 run with environment maybe modified, maybe in different dir.
1041 1044
1042 1045 if out is specified, it is assumed to be a file-like object that has a
1043 1046 write() method. stdout and stderr will be redirected to out.'''
1044 1047 try:
1045 1048 stdout.flush()
1046 1049 except Exception:
1047 1050 pass
1048 1051 cmd = quotecommand(cmd)
1049 1052 if pycompat.sysplatform == 'plan9' and (sys.version_info[0] == 2
1050 1053 and sys.version_info[1] < 7):
1051 1054 # subprocess kludge to work around issues in half-baked Python
1052 1055 # ports, notably bichued/python:
1053 1056 if not cwd is None:
1054 1057 os.chdir(cwd)
1055 1058 rc = os.system(cmd)
1056 1059 else:
1057 1060 env = shellenviron(environ)
1058 1061 if out is None or _isstdout(out):
1059 1062 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1060 1063 env=env, cwd=cwd)
1061 1064 else:
1062 1065 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1063 1066 env=env, cwd=cwd, stdout=subprocess.PIPE,
1064 1067 stderr=subprocess.STDOUT)
1065 1068 for line in iter(proc.stdout.readline, ''):
1066 1069 out.write(line)
1067 1070 proc.wait()
1068 1071 rc = proc.returncode
1069 1072 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1070 1073 rc = 0
1071 1074 return rc
1072 1075
1073 1076 def checksignature(func):
1074 1077 '''wrap a function with code to check for calling errors'''
1075 1078 def check(*args, **kwargs):
1076 1079 try:
1077 1080 return func(*args, **kwargs)
1078 1081 except TypeError:
1079 1082 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1080 1083 raise error.SignatureError
1081 1084 raise
1082 1085
1083 1086 return check
1084 1087
1085 1088 # a whilelist of known filesystems where hardlink works reliably
1086 1089 _hardlinkfswhitelist = set([
1087 1090 'btrfs',
1088 1091 'ext2',
1089 1092 'ext3',
1090 1093 'ext4',
1091 1094 'hfs',
1092 1095 'jfs',
1093 1096 'reiserfs',
1094 1097 'tmpfs',
1095 1098 'ufs',
1096 1099 'xfs',
1097 1100 'zfs',
1098 1101 ])
1099 1102
1100 1103 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1101 1104 '''copy a file, preserving mode and optionally other stat info like
1102 1105 atime/mtime
1103 1106
1104 1107 checkambig argument is used with filestat, and is useful only if
1105 1108 destination file is guarded by any lock (e.g. repo.lock or
1106 1109 repo.wlock).
1107 1110
1108 1111 copystat and checkambig should be exclusive.
1109 1112 '''
1110 1113 assert not (copystat and checkambig)
1111 1114 oldstat = None
1112 1115 if os.path.lexists(dest):
1113 1116 if checkambig:
1114 1117 oldstat = checkambig and filestat(dest)
1115 1118 unlink(dest)
1116 1119 if hardlink:
1117 1120 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1118 1121 # unless we are confident that dest is on a whitelisted filesystem.
1119 1122 try:
1120 1123 fstype = getfstype(os.path.dirname(dest))
1121 1124 except OSError:
1122 1125 fstype = None
1123 1126 if fstype not in _hardlinkfswhitelist:
1124 1127 hardlink = False
1125 1128 if hardlink:
1126 1129 try:
1127 1130 oslink(src, dest)
1128 1131 return
1129 1132 except (IOError, OSError):
1130 1133 pass # fall back to normal copy
1131 1134 if os.path.islink(src):
1132 1135 os.symlink(os.readlink(src), dest)
1133 1136 # copytime is ignored for symlinks, but in general copytime isn't needed
1134 1137 # for them anyway
1135 1138 else:
1136 1139 try:
1137 1140 shutil.copyfile(src, dest)
1138 1141 if copystat:
1139 1142 # copystat also copies mode
1140 1143 shutil.copystat(src, dest)
1141 1144 else:
1142 1145 shutil.copymode(src, dest)
1143 1146 if oldstat and oldstat.stat:
1144 1147 newstat = filestat(dest)
1145 1148 if newstat.isambig(oldstat):
1146 1149 # stat of copied file is ambiguous to original one
1147 1150 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1148 1151 os.utime(dest, (advanced, advanced))
1149 1152 except shutil.Error as inst:
1150 1153 raise Abort(str(inst))
1151 1154
1152 1155 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1153 1156 """Copy a directory tree using hardlinks if possible."""
1154 1157 num = 0
1155 1158
1156 1159 gettopic = lambda: hardlink and _('linking') or _('copying')
1157 1160
1158 1161 if os.path.isdir(src):
1159 1162 if hardlink is None:
1160 1163 hardlink = (os.stat(src).st_dev ==
1161 1164 os.stat(os.path.dirname(dst)).st_dev)
1162 1165 topic = gettopic()
1163 1166 os.mkdir(dst)
1164 1167 for name, kind in osutil.listdir(src):
1165 1168 srcname = os.path.join(src, name)
1166 1169 dstname = os.path.join(dst, name)
1167 1170 def nprog(t, pos):
1168 1171 if pos is not None:
1169 1172 return progress(t, pos + num)
1170 1173 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1171 1174 num += n
1172 1175 else:
1173 1176 if hardlink is None:
1174 1177 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1175 1178 os.stat(os.path.dirname(dst)).st_dev)
1176 1179 topic = gettopic()
1177 1180
1178 1181 if hardlink:
1179 1182 try:
1180 1183 oslink(src, dst)
1181 1184 except (IOError, OSError):
1182 1185 hardlink = False
1183 1186 shutil.copy(src, dst)
1184 1187 else:
1185 1188 shutil.copy(src, dst)
1186 1189 num += 1
1187 1190 progress(topic, num)
1188 1191 progress(topic, None)
1189 1192
1190 1193 return hardlink, num
1191 1194
1192 1195 _winreservednames = '''con prn aux nul
1193 1196 com1 com2 com3 com4 com5 com6 com7 com8 com9
1194 1197 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1195 1198 _winreservedchars = ':*?"<>|'
1196 1199 def checkwinfilename(path):
1197 1200 r'''Check that the base-relative path is a valid filename on Windows.
1198 1201 Returns None if the path is ok, or a UI string describing the problem.
1199 1202
1200 1203 >>> checkwinfilename("just/a/normal/path")
1201 1204 >>> checkwinfilename("foo/bar/con.xml")
1202 1205 "filename contains 'con', which is reserved on Windows"
1203 1206 >>> checkwinfilename("foo/con.xml/bar")
1204 1207 "filename contains 'con', which is reserved on Windows"
1205 1208 >>> checkwinfilename("foo/bar/xml.con")
1206 1209 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1207 1210 "filename contains 'AUX', which is reserved on Windows"
1208 1211 >>> checkwinfilename("foo/bar/bla:.txt")
1209 1212 "filename contains ':', which is reserved on Windows"
1210 1213 >>> checkwinfilename("foo/bar/b\07la.txt")
1211 1214 "filename contains '\\x07', which is invalid on Windows"
1212 1215 >>> checkwinfilename("foo/bar/bla ")
1213 1216 "filename ends with ' ', which is not allowed on Windows"
1214 1217 >>> checkwinfilename("../bar")
1215 1218 >>> checkwinfilename("foo\\")
1216 1219 "filename ends with '\\', which is invalid on Windows"
1217 1220 >>> checkwinfilename("foo\\/bar")
1218 1221 "directory name ends with '\\', which is invalid on Windows"
1219 1222 '''
1220 1223 if path.endswith('\\'):
1221 1224 return _("filename ends with '\\', which is invalid on Windows")
1222 1225 if '\\/' in path:
1223 1226 return _("directory name ends with '\\', which is invalid on Windows")
1224 1227 for n in path.replace('\\', '/').split('/'):
1225 1228 if not n:
1226 1229 continue
1227 for c in pycompat.bytestr(n):
1230 for c in _filenamebytestr(n):
1228 1231 if c in _winreservedchars:
1229 1232 return _("filename contains '%s', which is reserved "
1230 1233 "on Windows") % c
1231 1234 if ord(c) <= 31:
1232 1235 return _("filename contains %r, which is invalid "
1233 1236 "on Windows") % c
1234 1237 base = n.split('.')[0]
1235 1238 if base and base.lower() in _winreservednames:
1236 1239 return _("filename contains '%s', which is reserved "
1237 1240 "on Windows") % base
1238 1241 t = n[-1]
1239 1242 if t in '. ' and n not in '..':
1240 1243 return _("filename ends with '%s', which is not allowed "
1241 1244 "on Windows") % t
1242 1245
1243 1246 if pycompat.osname == 'nt':
1244 1247 checkosfilename = checkwinfilename
1245 1248 timer = time.clock
1246 1249 else:
1247 1250 checkosfilename = platform.checkosfilename
1248 1251 timer = time.time
1249 1252
1250 1253 if safehasattr(time, "perf_counter"):
1251 1254 timer = time.perf_counter
1252 1255
1253 1256 def makelock(info, pathname):
1254 1257 try:
1255 1258 return os.symlink(info, pathname)
1256 1259 except OSError as why:
1257 1260 if why.errno == errno.EEXIST:
1258 1261 raise
1259 1262 except AttributeError: # no symlink in os
1260 1263 pass
1261 1264
1262 1265 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1263 1266 os.write(ld, info)
1264 1267 os.close(ld)
1265 1268
1266 1269 def readlock(pathname):
1267 1270 try:
1268 1271 return os.readlink(pathname)
1269 1272 except OSError as why:
1270 1273 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1271 1274 raise
1272 1275 except AttributeError: # no symlink in os
1273 1276 pass
1274 1277 fp = posixfile(pathname)
1275 1278 r = fp.read()
1276 1279 fp.close()
1277 1280 return r
1278 1281
1279 1282 def fstat(fp):
1280 1283 '''stat file object that may not have fileno method.'''
1281 1284 try:
1282 1285 return os.fstat(fp.fileno())
1283 1286 except AttributeError:
1284 1287 return os.stat(fp.name)
1285 1288
1286 1289 # File system features
1287 1290
1288 1291 def fscasesensitive(path):
1289 1292 """
1290 1293 Return true if the given path is on a case-sensitive filesystem
1291 1294
1292 1295 Requires a path (like /foo/.hg) ending with a foldable final
1293 1296 directory component.
1294 1297 """
1295 1298 s1 = os.lstat(path)
1296 1299 d, b = os.path.split(path)
1297 1300 b2 = b.upper()
1298 1301 if b == b2:
1299 1302 b2 = b.lower()
1300 1303 if b == b2:
1301 1304 return True # no evidence against case sensitivity
1302 1305 p2 = os.path.join(d, b2)
1303 1306 try:
1304 1307 s2 = os.lstat(p2)
1305 1308 if s2 == s1:
1306 1309 return False
1307 1310 return True
1308 1311 except OSError:
1309 1312 return True
1310 1313
1311 1314 try:
1312 1315 import re2
1313 1316 _re2 = None
1314 1317 except ImportError:
1315 1318 _re2 = False
1316 1319
1317 1320 class _re(object):
1318 1321 def _checkre2(self):
1319 1322 global _re2
1320 1323 try:
1321 1324 # check if match works, see issue3964
1322 1325 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1323 1326 except ImportError:
1324 1327 _re2 = False
1325 1328
1326 1329 def compile(self, pat, flags=0):
1327 1330 '''Compile a regular expression, using re2 if possible
1328 1331
1329 1332 For best performance, use only re2-compatible regexp features. The
1330 1333 only flags from the re module that are re2-compatible are
1331 1334 IGNORECASE and MULTILINE.'''
1332 1335 if _re2 is None:
1333 1336 self._checkre2()
1334 1337 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1335 1338 if flags & remod.IGNORECASE:
1336 1339 pat = '(?i)' + pat
1337 1340 if flags & remod.MULTILINE:
1338 1341 pat = '(?m)' + pat
1339 1342 try:
1340 1343 return re2.compile(pat)
1341 1344 except re2.error:
1342 1345 pass
1343 1346 return remod.compile(pat, flags)
1344 1347
1345 1348 @propertycache
1346 1349 def escape(self):
1347 1350 '''Return the version of escape corresponding to self.compile.
1348 1351
1349 1352 This is imperfect because whether re2 or re is used for a particular
1350 1353 function depends on the flags, etc, but it's the best we can do.
1351 1354 '''
1352 1355 global _re2
1353 1356 if _re2 is None:
1354 1357 self._checkre2()
1355 1358 if _re2:
1356 1359 return re2.escape
1357 1360 else:
1358 1361 return remod.escape
1359 1362
1360 1363 re = _re()
1361 1364
1362 1365 _fspathcache = {}
1363 1366 def fspath(name, root):
1364 1367 '''Get name in the case stored in the filesystem
1365 1368
1366 1369 The name should be relative to root, and be normcase-ed for efficiency.
1367 1370
1368 1371 Note that this function is unnecessary, and should not be
1369 1372 called, for case-sensitive filesystems (simply because it's expensive).
1370 1373
1371 1374 The root should be normcase-ed, too.
1372 1375 '''
1373 1376 def _makefspathcacheentry(dir):
1374 1377 return dict((normcase(n), n) for n in os.listdir(dir))
1375 1378
1376 1379 seps = pycompat.ossep
1377 1380 if pycompat.osaltsep:
1378 1381 seps = seps + pycompat.osaltsep
1379 1382 # Protect backslashes. This gets silly very quickly.
1380 1383 seps.replace('\\','\\\\')
1381 1384 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1382 1385 dir = os.path.normpath(root)
1383 1386 result = []
1384 1387 for part, sep in pattern.findall(name):
1385 1388 if sep:
1386 1389 result.append(sep)
1387 1390 continue
1388 1391
1389 1392 if dir not in _fspathcache:
1390 1393 _fspathcache[dir] = _makefspathcacheentry(dir)
1391 1394 contents = _fspathcache[dir]
1392 1395
1393 1396 found = contents.get(part)
1394 1397 if not found:
1395 1398 # retry "once per directory" per "dirstate.walk" which
1396 1399 # may take place for each patches of "hg qpush", for example
1397 1400 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1398 1401 found = contents.get(part)
1399 1402
1400 1403 result.append(found or part)
1401 1404 dir = os.path.join(dir, part)
1402 1405
1403 1406 return ''.join(result)
1404 1407
1405 1408 def getfstype(dirpath):
1406 1409 '''Get the filesystem type name from a directory (best-effort)
1407 1410
1408 1411 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1409 1412 '''
1410 1413 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1411 1414
1412 1415 def checknlink(testfile):
1413 1416 '''check whether hardlink count reporting works properly'''
1414 1417
1415 1418 # testfile may be open, so we need a separate file for checking to
1416 1419 # work around issue2543 (or testfile may get lost on Samba shares)
1417 1420 f1 = testfile + ".hgtmp1"
1418 1421 if os.path.lexists(f1):
1419 1422 return False
1420 1423 try:
1421 1424 posixfile(f1, 'w').close()
1422 1425 except IOError:
1423 1426 try:
1424 1427 os.unlink(f1)
1425 1428 except OSError:
1426 1429 pass
1427 1430 return False
1428 1431
1429 1432 f2 = testfile + ".hgtmp2"
1430 1433 fd = None
1431 1434 try:
1432 1435 oslink(f1, f2)
1433 1436 # nlinks() may behave differently for files on Windows shares if
1434 1437 # the file is open.
1435 1438 fd = posixfile(f2)
1436 1439 return nlinks(f2) > 1
1437 1440 except OSError:
1438 1441 return False
1439 1442 finally:
1440 1443 if fd is not None:
1441 1444 fd.close()
1442 1445 for f in (f1, f2):
1443 1446 try:
1444 1447 os.unlink(f)
1445 1448 except OSError:
1446 1449 pass
1447 1450
1448 1451 def endswithsep(path):
1449 1452 '''Check path ends with os.sep or os.altsep.'''
1450 1453 return (path.endswith(pycompat.ossep)
1451 1454 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1452 1455
1453 1456 def splitpath(path):
1454 1457 '''Split path by os.sep.
1455 1458 Note that this function does not use os.altsep because this is
1456 1459 an alternative of simple "xxx.split(os.sep)".
1457 1460 It is recommended to use os.path.normpath() before using this
1458 1461 function if need.'''
1459 1462 return path.split(pycompat.ossep)
1460 1463
1461 1464 def gui():
1462 1465 '''Are we running in a GUI?'''
1463 1466 if pycompat.sysplatform == 'darwin':
1464 1467 if 'SSH_CONNECTION' in encoding.environ:
1465 1468 # handle SSH access to a box where the user is logged in
1466 1469 return False
1467 1470 elif getattr(osutil, 'isgui', None):
1468 1471 # check if a CoreGraphics session is available
1469 1472 return osutil.isgui()
1470 1473 else:
1471 1474 # pure build; use a safe default
1472 1475 return True
1473 1476 else:
1474 1477 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1475 1478
1476 1479 def mktempcopy(name, emptyok=False, createmode=None):
1477 1480 """Create a temporary file with the same contents from name
1478 1481
1479 1482 The permission bits are copied from the original file.
1480 1483
1481 1484 If the temporary file is going to be truncated immediately, you
1482 1485 can use emptyok=True as an optimization.
1483 1486
1484 1487 Returns the name of the temporary file.
1485 1488 """
1486 1489 d, fn = os.path.split(name)
1487 1490 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1488 1491 os.close(fd)
1489 1492 # Temporary files are created with mode 0600, which is usually not
1490 1493 # what we want. If the original file already exists, just copy
1491 1494 # its mode. Otherwise, manually obey umask.
1492 1495 copymode(name, temp, createmode)
1493 1496 if emptyok:
1494 1497 return temp
1495 1498 try:
1496 1499 try:
1497 1500 ifp = posixfile(name, "rb")
1498 1501 except IOError as inst:
1499 1502 if inst.errno == errno.ENOENT:
1500 1503 return temp
1501 1504 if not getattr(inst, 'filename', None):
1502 1505 inst.filename = name
1503 1506 raise
1504 1507 ofp = posixfile(temp, "wb")
1505 1508 for chunk in filechunkiter(ifp):
1506 1509 ofp.write(chunk)
1507 1510 ifp.close()
1508 1511 ofp.close()
1509 1512 except: # re-raises
1510 1513 try: os.unlink(temp)
1511 1514 except OSError: pass
1512 1515 raise
1513 1516 return temp
1514 1517
1515 1518 class filestat(object):
1516 1519 """help to exactly detect change of a file
1517 1520
1518 1521 'stat' attribute is result of 'os.stat()' if specified 'path'
1519 1522 exists. Otherwise, it is None. This can avoid preparative
1520 1523 'exists()' examination on client side of this class.
1521 1524 """
1522 1525 def __init__(self, path):
1523 1526 try:
1524 1527 self.stat = os.stat(path)
1525 1528 except OSError as err:
1526 1529 if err.errno != errno.ENOENT:
1527 1530 raise
1528 1531 self.stat = None
1529 1532
1530 1533 __hash__ = object.__hash__
1531 1534
1532 1535 def __eq__(self, old):
1533 1536 try:
1534 1537 # if ambiguity between stat of new and old file is
1535 1538 # avoided, comparison of size, ctime and mtime is enough
1536 1539 # to exactly detect change of a file regardless of platform
1537 1540 return (self.stat.st_size == old.stat.st_size and
1538 1541 self.stat.st_ctime == old.stat.st_ctime and
1539 1542 self.stat.st_mtime == old.stat.st_mtime)
1540 1543 except AttributeError:
1541 1544 return False
1542 1545
1543 1546 def isambig(self, old):
1544 1547 """Examine whether new (= self) stat is ambiguous against old one
1545 1548
1546 1549 "S[N]" below means stat of a file at N-th change:
1547 1550
1548 1551 - S[n-1].ctime < S[n].ctime: can detect change of a file
1549 1552 - S[n-1].ctime == S[n].ctime
1550 1553 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1551 1554 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1552 1555 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1553 1556 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1554 1557
1555 1558 Case (*2) above means that a file was changed twice or more at
1556 1559 same time in sec (= S[n-1].ctime), and comparison of timestamp
1557 1560 is ambiguous.
1558 1561
1559 1562 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1560 1563 timestamp is ambiguous".
1561 1564
1562 1565 But advancing mtime only in case (*2) doesn't work as
1563 1566 expected, because naturally advanced S[n].mtime in case (*1)
1564 1567 might be equal to manually advanced S[n-1 or earlier].mtime.
1565 1568
1566 1569 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1567 1570 treated as ambiguous regardless of mtime, to avoid overlooking
1568 1571 by confliction between such mtime.
1569 1572
1570 1573 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1571 1574 S[n].mtime", even if size of a file isn't changed.
1572 1575 """
1573 1576 try:
1574 1577 return (self.stat.st_ctime == old.stat.st_ctime)
1575 1578 except AttributeError:
1576 1579 return False
1577 1580
1578 1581 def avoidambig(self, path, old):
1579 1582 """Change file stat of specified path to avoid ambiguity
1580 1583
1581 1584 'old' should be previous filestat of 'path'.
1582 1585
1583 1586 This skips avoiding ambiguity, if a process doesn't have
1584 1587 appropriate privileges for 'path'.
1585 1588 """
1586 1589 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1587 1590 try:
1588 1591 os.utime(path, (advanced, advanced))
1589 1592 except OSError as inst:
1590 1593 if inst.errno == errno.EPERM:
1591 1594 # utime() on the file created by another user causes EPERM,
1592 1595 # if a process doesn't have appropriate privileges
1593 1596 return
1594 1597 raise
1595 1598
1596 1599 def __ne__(self, other):
1597 1600 return not self == other
1598 1601
1599 1602 class atomictempfile(object):
1600 1603 '''writable file object that atomically updates a file
1601 1604
1602 1605 All writes will go to a temporary copy of the original file. Call
1603 1606 close() when you are done writing, and atomictempfile will rename
1604 1607 the temporary copy to the original name, making the changes
1605 1608 visible. If the object is destroyed without being closed, all your
1606 1609 writes are discarded.
1607 1610
1608 1611 checkambig argument of constructor is used with filestat, and is
1609 1612 useful only if target file is guarded by any lock (e.g. repo.lock
1610 1613 or repo.wlock).
1611 1614 '''
1612 1615 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1613 1616 self.__name = name # permanent name
1614 1617 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1615 1618 createmode=createmode)
1616 1619 self._fp = posixfile(self._tempname, mode)
1617 1620 self._checkambig = checkambig
1618 1621
1619 1622 # delegated methods
1620 1623 self.read = self._fp.read
1621 1624 self.write = self._fp.write
1622 1625 self.seek = self._fp.seek
1623 1626 self.tell = self._fp.tell
1624 1627 self.fileno = self._fp.fileno
1625 1628
1626 1629 def close(self):
1627 1630 if not self._fp.closed:
1628 1631 self._fp.close()
1629 1632 filename = localpath(self.__name)
1630 1633 oldstat = self._checkambig and filestat(filename)
1631 1634 if oldstat and oldstat.stat:
1632 1635 rename(self._tempname, filename)
1633 1636 newstat = filestat(filename)
1634 1637 if newstat.isambig(oldstat):
1635 1638 # stat of changed file is ambiguous to original one
1636 1639 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1637 1640 os.utime(filename, (advanced, advanced))
1638 1641 else:
1639 1642 rename(self._tempname, filename)
1640 1643
1641 1644 def discard(self):
1642 1645 if not self._fp.closed:
1643 1646 try:
1644 1647 os.unlink(self._tempname)
1645 1648 except OSError:
1646 1649 pass
1647 1650 self._fp.close()
1648 1651
1649 1652 def __del__(self):
1650 1653 if safehasattr(self, '_fp'): # constructor actually did something
1651 1654 self.discard()
1652 1655
1653 1656 def __enter__(self):
1654 1657 return self
1655 1658
1656 1659 def __exit__(self, exctype, excvalue, traceback):
1657 1660 if exctype is not None:
1658 1661 self.discard()
1659 1662 else:
1660 1663 self.close()
1661 1664
1662 1665 def unlinkpath(f, ignoremissing=False):
1663 1666 """unlink and remove the directory if it is empty"""
1664 1667 if ignoremissing:
1665 1668 tryunlink(f)
1666 1669 else:
1667 1670 unlink(f)
1668 1671 # try removing directories that might now be empty
1669 1672 try:
1670 1673 removedirs(os.path.dirname(f))
1671 1674 except OSError:
1672 1675 pass
1673 1676
1674 1677 def tryunlink(f):
1675 1678 """Attempt to remove a file, ignoring ENOENT errors."""
1676 1679 try:
1677 1680 unlink(f)
1678 1681 except OSError as e:
1679 1682 if e.errno != errno.ENOENT:
1680 1683 raise
1681 1684
1682 1685 def makedirs(name, mode=None, notindexed=False):
1683 1686 """recursive directory creation with parent mode inheritance
1684 1687
1685 1688 Newly created directories are marked as "not to be indexed by
1686 1689 the content indexing service", if ``notindexed`` is specified
1687 1690 for "write" mode access.
1688 1691 """
1689 1692 try:
1690 1693 makedir(name, notindexed)
1691 1694 except OSError as err:
1692 1695 if err.errno == errno.EEXIST:
1693 1696 return
1694 1697 if err.errno != errno.ENOENT or not name:
1695 1698 raise
1696 1699 parent = os.path.dirname(os.path.abspath(name))
1697 1700 if parent == name:
1698 1701 raise
1699 1702 makedirs(parent, mode, notindexed)
1700 1703 try:
1701 1704 makedir(name, notindexed)
1702 1705 except OSError as err:
1703 1706 # Catch EEXIST to handle races
1704 1707 if err.errno == errno.EEXIST:
1705 1708 return
1706 1709 raise
1707 1710 if mode is not None:
1708 1711 os.chmod(name, mode)
1709 1712
1710 1713 def readfile(path):
1711 1714 with open(path, 'rb') as fp:
1712 1715 return fp.read()
1713 1716
1714 1717 def writefile(path, text):
1715 1718 with open(path, 'wb') as fp:
1716 1719 fp.write(text)
1717 1720
1718 1721 def appendfile(path, text):
1719 1722 with open(path, 'ab') as fp:
1720 1723 fp.write(text)
1721 1724
1722 1725 class chunkbuffer(object):
1723 1726 """Allow arbitrary sized chunks of data to be efficiently read from an
1724 1727 iterator over chunks of arbitrary size."""
1725 1728
1726 1729 def __init__(self, in_iter):
1727 1730 """in_iter is the iterator that's iterating over the input chunks.
1728 1731 targetsize is how big a buffer to try to maintain."""
1729 1732 def splitbig(chunks):
1730 1733 for chunk in chunks:
1731 1734 if len(chunk) > 2**20:
1732 1735 pos = 0
1733 1736 while pos < len(chunk):
1734 1737 end = pos + 2 ** 18
1735 1738 yield chunk[pos:end]
1736 1739 pos = end
1737 1740 else:
1738 1741 yield chunk
1739 1742 self.iter = splitbig(in_iter)
1740 1743 self._queue = collections.deque()
1741 1744 self._chunkoffset = 0
1742 1745
1743 1746 def read(self, l=None):
1744 1747 """Read L bytes of data from the iterator of chunks of data.
1745 1748 Returns less than L bytes if the iterator runs dry.
1746 1749
1747 1750 If size parameter is omitted, read everything"""
1748 1751 if l is None:
1749 1752 return ''.join(self.iter)
1750 1753
1751 1754 left = l
1752 1755 buf = []
1753 1756 queue = self._queue
1754 1757 while left > 0:
1755 1758 # refill the queue
1756 1759 if not queue:
1757 1760 target = 2**18
1758 1761 for chunk in self.iter:
1759 1762 queue.append(chunk)
1760 1763 target -= len(chunk)
1761 1764 if target <= 0:
1762 1765 break
1763 1766 if not queue:
1764 1767 break
1765 1768
1766 1769 # The easy way to do this would be to queue.popleft(), modify the
1767 1770 # chunk (if necessary), then queue.appendleft(). However, for cases
1768 1771 # where we read partial chunk content, this incurs 2 dequeue
1769 1772 # mutations and creates a new str for the remaining chunk in the
1770 1773 # queue. Our code below avoids this overhead.
1771 1774
1772 1775 chunk = queue[0]
1773 1776 chunkl = len(chunk)
1774 1777 offset = self._chunkoffset
1775 1778
1776 1779 # Use full chunk.
1777 1780 if offset == 0 and left >= chunkl:
1778 1781 left -= chunkl
1779 1782 queue.popleft()
1780 1783 buf.append(chunk)
1781 1784 # self._chunkoffset remains at 0.
1782 1785 continue
1783 1786
1784 1787 chunkremaining = chunkl - offset
1785 1788
1786 1789 # Use all of unconsumed part of chunk.
1787 1790 if left >= chunkremaining:
1788 1791 left -= chunkremaining
1789 1792 queue.popleft()
1790 1793 # offset == 0 is enabled by block above, so this won't merely
1791 1794 # copy via ``chunk[0:]``.
1792 1795 buf.append(chunk[offset:])
1793 1796 self._chunkoffset = 0
1794 1797
1795 1798 # Partial chunk needed.
1796 1799 else:
1797 1800 buf.append(chunk[offset:offset + left])
1798 1801 self._chunkoffset += left
1799 1802 left -= chunkremaining
1800 1803
1801 1804 return ''.join(buf)
1802 1805
1803 1806 def filechunkiter(f, size=131072, limit=None):
1804 1807 """Create a generator that produces the data in the file size
1805 1808 (default 131072) bytes at a time, up to optional limit (default is
1806 1809 to read all data). Chunks may be less than size bytes if the
1807 1810 chunk is the last chunk in the file, or the file is a socket or
1808 1811 some other type of file that sometimes reads less data than is
1809 1812 requested."""
1810 1813 assert size >= 0
1811 1814 assert limit is None or limit >= 0
1812 1815 while True:
1813 1816 if limit is None:
1814 1817 nbytes = size
1815 1818 else:
1816 1819 nbytes = min(limit, size)
1817 1820 s = nbytes and f.read(nbytes)
1818 1821 if not s:
1819 1822 break
1820 1823 if limit:
1821 1824 limit -= len(s)
1822 1825 yield s
1823 1826
1824 1827 def makedate(timestamp=None):
1825 1828 '''Return a unix timestamp (or the current time) as a (unixtime,
1826 1829 offset) tuple based off the local timezone.'''
1827 1830 if timestamp is None:
1828 1831 timestamp = time.time()
1829 1832 if timestamp < 0:
1830 1833 hint = _("check your clock")
1831 1834 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1832 1835 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1833 1836 datetime.datetime.fromtimestamp(timestamp))
1834 1837 tz = delta.days * 86400 + delta.seconds
1835 1838 return timestamp, tz
1836 1839
1837 1840 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1838 1841 """represent a (unixtime, offset) tuple as a localized time.
1839 1842 unixtime is seconds since the epoch, and offset is the time zone's
1840 1843 number of seconds away from UTC.
1841 1844
1842 1845 >>> datestr((0, 0))
1843 1846 'Thu Jan 01 00:00:00 1970 +0000'
1844 1847 >>> datestr((42, 0))
1845 1848 'Thu Jan 01 00:00:42 1970 +0000'
1846 1849 >>> datestr((-42, 0))
1847 1850 'Wed Dec 31 23:59:18 1969 +0000'
1848 1851 >>> datestr((0x7fffffff, 0))
1849 1852 'Tue Jan 19 03:14:07 2038 +0000'
1850 1853 >>> datestr((-0x80000000, 0))
1851 1854 'Fri Dec 13 20:45:52 1901 +0000'
1852 1855 """
1853 1856 t, tz = date or makedate()
1854 1857 if "%1" in format or "%2" in format or "%z" in format:
1855 1858 sign = (tz > 0) and "-" or "+"
1856 1859 minutes = abs(tz) // 60
1857 1860 q, r = divmod(minutes, 60)
1858 1861 format = format.replace("%z", "%1%2")
1859 1862 format = format.replace("%1", "%c%02d" % (sign, q))
1860 1863 format = format.replace("%2", "%02d" % r)
1861 1864 d = t - tz
1862 1865 if d > 0x7fffffff:
1863 1866 d = 0x7fffffff
1864 1867 elif d < -0x80000000:
1865 1868 d = -0x80000000
1866 1869 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1867 1870 # because they use the gmtime() system call which is buggy on Windows
1868 1871 # for negative values.
1869 1872 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1870 1873 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1871 1874 return s
1872 1875
1873 1876 def shortdate(date=None):
1874 1877 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1875 1878 return datestr(date, format='%Y-%m-%d')
1876 1879
1877 1880 def parsetimezone(s):
1878 1881 """find a trailing timezone, if any, in string, and return a
1879 1882 (offset, remainder) pair"""
1880 1883
1881 1884 if s.endswith("GMT") or s.endswith("UTC"):
1882 1885 return 0, s[:-3].rstrip()
1883 1886
1884 1887 # Unix-style timezones [+-]hhmm
1885 1888 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1886 1889 sign = (s[-5] == "+") and 1 or -1
1887 1890 hours = int(s[-4:-2])
1888 1891 minutes = int(s[-2:])
1889 1892 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1890 1893
1891 1894 # ISO8601 trailing Z
1892 1895 if s.endswith("Z") and s[-2:-1].isdigit():
1893 1896 return 0, s[:-1]
1894 1897
1895 1898 # ISO8601-style [+-]hh:mm
1896 1899 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1897 1900 s[-5:-3].isdigit() and s[-2:].isdigit()):
1898 1901 sign = (s[-6] == "+") and 1 or -1
1899 1902 hours = int(s[-5:-3])
1900 1903 minutes = int(s[-2:])
1901 1904 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1902 1905
1903 1906 return None, s
1904 1907
1905 1908 def strdate(string, format, defaults=None):
1906 1909 """parse a localized time string and return a (unixtime, offset) tuple.
1907 1910 if the string cannot be parsed, ValueError is raised."""
1908 1911 if defaults is None:
1909 1912 defaults = {}
1910 1913
1911 1914 # NOTE: unixtime = localunixtime + offset
1912 1915 offset, date = parsetimezone(string)
1913 1916
1914 1917 # add missing elements from defaults
1915 1918 usenow = False # default to using biased defaults
1916 1919 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1917 1920 found = [True for p in part if ("%"+p) in format]
1918 1921 if not found:
1919 1922 date += "@" + defaults[part][usenow]
1920 1923 format += "@%" + part[0]
1921 1924 else:
1922 1925 # We've found a specific time element, less specific time
1923 1926 # elements are relative to today
1924 1927 usenow = True
1925 1928
1926 1929 timetuple = time.strptime(date, format)
1927 1930 localunixtime = int(calendar.timegm(timetuple))
1928 1931 if offset is None:
1929 1932 # local timezone
1930 1933 unixtime = int(time.mktime(timetuple))
1931 1934 offset = unixtime - localunixtime
1932 1935 else:
1933 1936 unixtime = localunixtime + offset
1934 1937 return unixtime, offset
1935 1938
1936 1939 def parsedate(date, formats=None, bias=None):
1937 1940 """parse a localized date/time and return a (unixtime, offset) tuple.
1938 1941
1939 1942 The date may be a "unixtime offset" string or in one of the specified
1940 1943 formats. If the date already is a (unixtime, offset) tuple, it is returned.
1941 1944
1942 1945 >>> parsedate(' today ') == parsedate(\
1943 1946 datetime.date.today().strftime('%b %d'))
1944 1947 True
1945 1948 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
1946 1949 datetime.timedelta(days=1)\
1947 1950 ).strftime('%b %d'))
1948 1951 True
1949 1952 >>> now, tz = makedate()
1950 1953 >>> strnow, strtz = parsedate('now')
1951 1954 >>> (strnow - now) < 1
1952 1955 True
1953 1956 >>> tz == strtz
1954 1957 True
1955 1958 """
1956 1959 if bias is None:
1957 1960 bias = {}
1958 1961 if not date:
1959 1962 return 0, 0
1960 1963 if isinstance(date, tuple) and len(date) == 2:
1961 1964 return date
1962 1965 if not formats:
1963 1966 formats = defaultdateformats
1964 1967 date = date.strip()
1965 1968
1966 1969 if date == 'now' or date == _('now'):
1967 1970 return makedate()
1968 1971 if date == 'today' or date == _('today'):
1969 1972 date = datetime.date.today().strftime('%b %d')
1970 1973 elif date == 'yesterday' or date == _('yesterday'):
1971 1974 date = (datetime.date.today() -
1972 1975 datetime.timedelta(days=1)).strftime('%b %d')
1973 1976
1974 1977 try:
1975 1978 when, offset = map(int, date.split(' '))
1976 1979 except ValueError:
1977 1980 # fill out defaults
1978 1981 now = makedate()
1979 1982 defaults = {}
1980 1983 for part in ("d", "mb", "yY", "HI", "M", "S"):
1981 1984 # this piece is for rounding the specific end of unknowns
1982 1985 b = bias.get(part)
1983 1986 if b is None:
1984 1987 if part[0] in "HMS":
1985 1988 b = "00"
1986 1989 else:
1987 1990 b = "0"
1988 1991
1989 1992 # this piece is for matching the generic end to today's date
1990 1993 n = datestr(now, "%" + part[0])
1991 1994
1992 1995 defaults[part] = (b, n)
1993 1996
1994 1997 for format in formats:
1995 1998 try:
1996 1999 when, offset = strdate(date, format, defaults)
1997 2000 except (ValueError, OverflowError):
1998 2001 pass
1999 2002 else:
2000 2003 break
2001 2004 else:
2002 2005 raise Abort(_('invalid date: %r') % date)
2003 2006 # validate explicit (probably user-specified) date and
2004 2007 # time zone offset. values must fit in signed 32 bits for
2005 2008 # current 32-bit linux runtimes. timezones go from UTC-12
2006 2009 # to UTC+14
2007 2010 if when < -0x80000000 or when > 0x7fffffff:
2008 2011 raise Abort(_('date exceeds 32 bits: %d') % when)
2009 2012 if offset < -50400 or offset > 43200:
2010 2013 raise Abort(_('impossible time zone offset: %d') % offset)
2011 2014 return when, offset
2012 2015
2013 2016 def matchdate(date):
2014 2017 """Return a function that matches a given date match specifier
2015 2018
2016 2019 Formats include:
2017 2020
2018 2021 '{date}' match a given date to the accuracy provided
2019 2022
2020 2023 '<{date}' on or before a given date
2021 2024
2022 2025 '>{date}' on or after a given date
2023 2026
2024 2027 >>> p1 = parsedate("10:29:59")
2025 2028 >>> p2 = parsedate("10:30:00")
2026 2029 >>> p3 = parsedate("10:30:59")
2027 2030 >>> p4 = parsedate("10:31:00")
2028 2031 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2029 2032 >>> f = matchdate("10:30")
2030 2033 >>> f(p1[0])
2031 2034 False
2032 2035 >>> f(p2[0])
2033 2036 True
2034 2037 >>> f(p3[0])
2035 2038 True
2036 2039 >>> f(p4[0])
2037 2040 False
2038 2041 >>> f(p5[0])
2039 2042 False
2040 2043 """
2041 2044
2042 2045 def lower(date):
2043 2046 d = {'mb': "1", 'd': "1"}
2044 2047 return parsedate(date, extendeddateformats, d)[0]
2045 2048
2046 2049 def upper(date):
2047 2050 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2048 2051 for days in ("31", "30", "29"):
2049 2052 try:
2050 2053 d["d"] = days
2051 2054 return parsedate(date, extendeddateformats, d)[0]
2052 2055 except Abort:
2053 2056 pass
2054 2057 d["d"] = "28"
2055 2058 return parsedate(date, extendeddateformats, d)[0]
2056 2059
2057 2060 date = date.strip()
2058 2061
2059 2062 if not date:
2060 2063 raise Abort(_("dates cannot consist entirely of whitespace"))
2061 2064 elif date[0] == "<":
2062 2065 if not date[1:]:
2063 2066 raise Abort(_("invalid day spec, use '<DATE'"))
2064 2067 when = upper(date[1:])
2065 2068 return lambda x: x <= when
2066 2069 elif date[0] == ">":
2067 2070 if not date[1:]:
2068 2071 raise Abort(_("invalid day spec, use '>DATE'"))
2069 2072 when = lower(date[1:])
2070 2073 return lambda x: x >= when
2071 2074 elif date[0] == "-":
2072 2075 try:
2073 2076 days = int(date[1:])
2074 2077 except ValueError:
2075 2078 raise Abort(_("invalid day spec: %s") % date[1:])
2076 2079 if days < 0:
2077 2080 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2078 2081 % date[1:])
2079 2082 when = makedate()[0] - days * 3600 * 24
2080 2083 return lambda x: x >= when
2081 2084 elif " to " in date:
2082 2085 a, b = date.split(" to ")
2083 2086 start, stop = lower(a), upper(b)
2084 2087 return lambda x: x >= start and x <= stop
2085 2088 else:
2086 2089 start, stop = lower(date), upper(date)
2087 2090 return lambda x: x >= start and x <= stop
2088 2091
2089 2092 def stringmatcher(pattern, casesensitive=True):
2090 2093 """
2091 2094 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2092 2095 returns the matcher name, pattern, and matcher function.
2093 2096 missing or unknown prefixes are treated as literal matches.
2094 2097
2095 2098 helper for tests:
2096 2099 >>> def test(pattern, *tests):
2097 2100 ... kind, pattern, matcher = stringmatcher(pattern)
2098 2101 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2099 2102 >>> def itest(pattern, *tests):
2100 2103 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2101 2104 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2102 2105
2103 2106 exact matching (no prefix):
2104 2107 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2105 2108 ('literal', 'abcdefg', [False, False, True])
2106 2109
2107 2110 regex matching ('re:' prefix)
2108 2111 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2109 2112 ('re', 'a.+b', [False, False, True])
2110 2113
2111 2114 force exact matches ('literal:' prefix)
2112 2115 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2113 2116 ('literal', 're:foobar', [False, True])
2114 2117
2115 2118 unknown prefixes are ignored and treated as literals
2116 2119 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2117 2120 ('literal', 'foo:bar', [False, False, True])
2118 2121
2119 2122 case insensitive regex matches
2120 2123 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2121 2124 ('re', 'A.+b', [False, False, True])
2122 2125
2123 2126 case insensitive literal matches
2124 2127 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2125 2128 ('literal', 'ABCDEFG', [False, False, True])
2126 2129 """
2127 2130 if pattern.startswith('re:'):
2128 2131 pattern = pattern[3:]
2129 2132 try:
2130 2133 flags = 0
2131 2134 if not casesensitive:
2132 2135 flags = remod.I
2133 2136 regex = remod.compile(pattern, flags)
2134 2137 except remod.error as e:
2135 2138 raise error.ParseError(_('invalid regular expression: %s')
2136 2139 % e)
2137 2140 return 're', pattern, regex.search
2138 2141 elif pattern.startswith('literal:'):
2139 2142 pattern = pattern[8:]
2140 2143
2141 2144 match = pattern.__eq__
2142 2145
2143 2146 if not casesensitive:
2144 2147 ipat = encoding.lower(pattern)
2145 2148 match = lambda s: ipat == encoding.lower(s)
2146 2149 return 'literal', pattern, match
2147 2150
2148 2151 def shortuser(user):
2149 2152 """Return a short representation of a user name or email address."""
2150 2153 f = user.find('@')
2151 2154 if f >= 0:
2152 2155 user = user[:f]
2153 2156 f = user.find('<')
2154 2157 if f >= 0:
2155 2158 user = user[f + 1:]
2156 2159 f = user.find(' ')
2157 2160 if f >= 0:
2158 2161 user = user[:f]
2159 2162 f = user.find('.')
2160 2163 if f >= 0:
2161 2164 user = user[:f]
2162 2165 return user
2163 2166
2164 2167 def emailuser(user):
2165 2168 """Return the user portion of an email address."""
2166 2169 f = user.find('@')
2167 2170 if f >= 0:
2168 2171 user = user[:f]
2169 2172 f = user.find('<')
2170 2173 if f >= 0:
2171 2174 user = user[f + 1:]
2172 2175 return user
2173 2176
2174 2177 def email(author):
2175 2178 '''get email of author.'''
2176 2179 r = author.find('>')
2177 2180 if r == -1:
2178 2181 r = None
2179 2182 return author[author.find('<') + 1:r]
2180 2183
2181 2184 def ellipsis(text, maxlength=400):
2182 2185 """Trim string to at most maxlength (default: 400) columns in display."""
2183 2186 return encoding.trim(text, maxlength, ellipsis='...')
2184 2187
2185 2188 def unitcountfn(*unittable):
2186 2189 '''return a function that renders a readable count of some quantity'''
2187 2190
2188 2191 def go(count):
2189 2192 for multiplier, divisor, format in unittable:
2190 2193 if abs(count) >= divisor * multiplier:
2191 2194 return format % (count / float(divisor))
2192 2195 return unittable[-1][2] % count
2193 2196
2194 2197 return go
2195 2198
2196 2199 def processlinerange(fromline, toline):
2197 2200 """Check that linerange <fromline>:<toline> makes sense and return a
2198 2201 0-based range.
2199 2202
2200 2203 >>> processlinerange(10, 20)
2201 2204 (9, 20)
2202 2205 >>> processlinerange(2, 1)
2203 2206 Traceback (most recent call last):
2204 2207 ...
2205 2208 ParseError: line range must be positive
2206 2209 >>> processlinerange(0, 5)
2207 2210 Traceback (most recent call last):
2208 2211 ...
2209 2212 ParseError: fromline must be strictly positive
2210 2213 """
2211 2214 if toline - fromline < 0:
2212 2215 raise error.ParseError(_("line range must be positive"))
2213 2216 if fromline < 1:
2214 2217 raise error.ParseError(_("fromline must be strictly positive"))
2215 2218 return fromline - 1, toline
2216 2219
2217 2220 bytecount = unitcountfn(
2218 2221 (100, 1 << 30, _('%.0f GB')),
2219 2222 (10, 1 << 30, _('%.1f GB')),
2220 2223 (1, 1 << 30, _('%.2f GB')),
2221 2224 (100, 1 << 20, _('%.0f MB')),
2222 2225 (10, 1 << 20, _('%.1f MB')),
2223 2226 (1, 1 << 20, _('%.2f MB')),
2224 2227 (100, 1 << 10, _('%.0f KB')),
2225 2228 (10, 1 << 10, _('%.1f KB')),
2226 2229 (1, 1 << 10, _('%.2f KB')),
2227 2230 (1, 1, _('%.0f bytes')),
2228 2231 )
2229 2232
2230 2233 # Matches a single EOL which can either be a CRLF where repeated CR
2231 2234 # are removed or a LF. We do not care about old Macintosh files, so a
2232 2235 # stray CR is an error.
2233 2236 _eolre = remod.compile(br'\r*\n')
2234 2237
2235 2238 def tolf(s):
2236 2239 return _eolre.sub('\n', s)
2237 2240
2238 2241 def tocrlf(s):
2239 2242 return _eolre.sub('\r\n', s)
2240 2243
2241 2244 if pycompat.oslinesep == '\r\n':
2242 2245 tonativeeol = tocrlf
2243 2246 fromnativeeol = tolf
2244 2247 else:
2245 2248 tonativeeol = pycompat.identity
2246 2249 fromnativeeol = pycompat.identity
2247 2250
2248 2251 def escapestr(s):
2249 2252 # call underlying function of s.encode('string_escape') directly for
2250 2253 # Python 3 compatibility
2251 2254 return codecs.escape_encode(s)[0]
2252 2255
2253 2256 def unescapestr(s):
2254 2257 return codecs.escape_decode(s)[0]
2255 2258
2256 2259 def uirepr(s):
2257 2260 # Avoid double backslash in Windows path repr()
2258 2261 return repr(s).replace('\\\\', '\\')
2259 2262
2260 2263 # delay import of textwrap
2261 2264 def MBTextWrapper(**kwargs):
2262 2265 class tw(textwrap.TextWrapper):
2263 2266 """
2264 2267 Extend TextWrapper for width-awareness.
2265 2268
2266 2269 Neither number of 'bytes' in any encoding nor 'characters' is
2267 2270 appropriate to calculate terminal columns for specified string.
2268 2271
2269 2272 Original TextWrapper implementation uses built-in 'len()' directly,
2270 2273 so overriding is needed to use width information of each characters.
2271 2274
2272 2275 In addition, characters classified into 'ambiguous' width are
2273 2276 treated as wide in East Asian area, but as narrow in other.
2274 2277
2275 2278 This requires use decision to determine width of such characters.
2276 2279 """
2277 2280 def _cutdown(self, ucstr, space_left):
2278 2281 l = 0
2279 2282 colwidth = encoding.ucolwidth
2280 2283 for i in xrange(len(ucstr)):
2281 2284 l += colwidth(ucstr[i])
2282 2285 if space_left < l:
2283 2286 return (ucstr[:i], ucstr[i:])
2284 2287 return ucstr, ''
2285 2288
2286 2289 # overriding of base class
2287 2290 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2288 2291 space_left = max(width - cur_len, 1)
2289 2292
2290 2293 if self.break_long_words:
2291 2294 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2292 2295 cur_line.append(cut)
2293 2296 reversed_chunks[-1] = res
2294 2297 elif not cur_line:
2295 2298 cur_line.append(reversed_chunks.pop())
2296 2299
2297 2300 # this overriding code is imported from TextWrapper of Python 2.6
2298 2301 # to calculate columns of string by 'encoding.ucolwidth()'
2299 2302 def _wrap_chunks(self, chunks):
2300 2303 colwidth = encoding.ucolwidth
2301 2304
2302 2305 lines = []
2303 2306 if self.width <= 0:
2304 2307 raise ValueError("invalid width %r (must be > 0)" % self.width)
2305 2308
2306 2309 # Arrange in reverse order so items can be efficiently popped
2307 2310 # from a stack of chucks.
2308 2311 chunks.reverse()
2309 2312
2310 2313 while chunks:
2311 2314
2312 2315 # Start the list of chunks that will make up the current line.
2313 2316 # cur_len is just the length of all the chunks in cur_line.
2314 2317 cur_line = []
2315 2318 cur_len = 0
2316 2319
2317 2320 # Figure out which static string will prefix this line.
2318 2321 if lines:
2319 2322 indent = self.subsequent_indent
2320 2323 else:
2321 2324 indent = self.initial_indent
2322 2325
2323 2326 # Maximum width for this line.
2324 2327 width = self.width - len(indent)
2325 2328
2326 2329 # First chunk on line is whitespace -- drop it, unless this
2327 2330 # is the very beginning of the text (i.e. no lines started yet).
2328 2331 if self.drop_whitespace and chunks[-1].strip() == '' and lines:
2329 2332 del chunks[-1]
2330 2333
2331 2334 while chunks:
2332 2335 l = colwidth(chunks[-1])
2333 2336
2334 2337 # Can at least squeeze this chunk onto the current line.
2335 2338 if cur_len + l <= width:
2336 2339 cur_line.append(chunks.pop())
2337 2340 cur_len += l
2338 2341
2339 2342 # Nope, this line is full.
2340 2343 else:
2341 2344 break
2342 2345
2343 2346 # The current line is full, and the next chunk is too big to
2344 2347 # fit on *any* line (not just this one).
2345 2348 if chunks and colwidth(chunks[-1]) > width:
2346 2349 self._handle_long_word(chunks, cur_line, cur_len, width)
2347 2350
2348 2351 # If the last chunk on this line is all whitespace, drop it.
2349 2352 if (self.drop_whitespace and
2350 2353 cur_line and cur_line[-1].strip() == ''):
2351 2354 del cur_line[-1]
2352 2355
2353 2356 # Convert current line back to a string and store it in list
2354 2357 # of all lines (return value).
2355 2358 if cur_line:
2356 2359 lines.append(indent + ''.join(cur_line))
2357 2360
2358 2361 return lines
2359 2362
2360 2363 global MBTextWrapper
2361 2364 MBTextWrapper = tw
2362 2365 return tw(**kwargs)
2363 2366
2364 2367 def wrap(line, width, initindent='', hangindent=''):
2365 2368 maxindent = max(len(hangindent), len(initindent))
2366 2369 if width <= maxindent:
2367 2370 # adjust for weird terminal size
2368 2371 width = max(78, maxindent + 1)
2369 2372 line = line.decode(pycompat.sysstr(encoding.encoding),
2370 2373 pycompat.sysstr(encoding.encodingmode))
2371 2374 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2372 2375 pycompat.sysstr(encoding.encodingmode))
2373 2376 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2374 2377 pycompat.sysstr(encoding.encodingmode))
2375 2378 wrapper = MBTextWrapper(width=width,
2376 2379 initial_indent=initindent,
2377 2380 subsequent_indent=hangindent)
2378 2381 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2379 2382
2380 2383 if (pyplatform.python_implementation() == 'CPython' and
2381 2384 sys.version_info < (3, 0)):
2382 2385 # There is an issue in CPython that some IO methods do not handle EINTR
2383 2386 # correctly. The following table shows what CPython version (and functions)
2384 2387 # are affected (buggy: has the EINTR bug, okay: otherwise):
2385 2388 #
2386 2389 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2387 2390 # --------------------------------------------------
2388 2391 # fp.__iter__ | buggy | buggy | okay
2389 2392 # fp.read* | buggy | okay [1] | okay
2390 2393 #
2391 2394 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2392 2395 #
2393 2396 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2394 2397 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2395 2398 #
2396 2399 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2397 2400 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2398 2401 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2399 2402 # fp.__iter__ but not other fp.read* methods.
2400 2403 #
2401 2404 # On modern systems like Linux, the "read" syscall cannot be interrupted
2402 2405 # when reading "fast" files like on-disk files. So the EINTR issue only
2403 2406 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2404 2407 # files approximately as "fast" files and use the fast (unsafe) code path,
2405 2408 # to minimize the performance impact.
2406 2409 if sys.version_info >= (2, 7, 4):
2407 2410 # fp.readline deals with EINTR correctly, use it as a workaround.
2408 2411 def _safeiterfile(fp):
2409 2412 return iter(fp.readline, '')
2410 2413 else:
2411 2414 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2412 2415 # note: this may block longer than necessary because of bufsize.
2413 2416 def _safeiterfile(fp, bufsize=4096):
2414 2417 fd = fp.fileno()
2415 2418 line = ''
2416 2419 while True:
2417 2420 try:
2418 2421 buf = os.read(fd, bufsize)
2419 2422 except OSError as ex:
2420 2423 # os.read only raises EINTR before any data is read
2421 2424 if ex.errno == errno.EINTR:
2422 2425 continue
2423 2426 else:
2424 2427 raise
2425 2428 line += buf
2426 2429 if '\n' in buf:
2427 2430 splitted = line.splitlines(True)
2428 2431 line = ''
2429 2432 for l in splitted:
2430 2433 if l[-1] == '\n':
2431 2434 yield l
2432 2435 else:
2433 2436 line = l
2434 2437 if not buf:
2435 2438 break
2436 2439 if line:
2437 2440 yield line
2438 2441
2439 2442 def iterfile(fp):
2440 2443 fastpath = True
2441 2444 if type(fp) is file:
2442 2445 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2443 2446 if fastpath:
2444 2447 return fp
2445 2448 else:
2446 2449 return _safeiterfile(fp)
2447 2450 else:
2448 2451 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2449 2452 def iterfile(fp):
2450 2453 return fp
2451 2454
2452 2455 def iterlines(iterator):
2453 2456 for chunk in iterator:
2454 2457 for line in chunk.splitlines():
2455 2458 yield line
2456 2459
2457 2460 def expandpath(path):
2458 2461 return os.path.expanduser(os.path.expandvars(path))
2459 2462
2460 2463 def hgcmd():
2461 2464 """Return the command used to execute current hg
2462 2465
2463 2466 This is different from hgexecutable() because on Windows we want
2464 2467 to avoid things opening new shell windows like batch files, so we
2465 2468 get either the python call or current executable.
2466 2469 """
2467 2470 if mainfrozen():
2468 2471 if getattr(sys, 'frozen', None) == 'macosx_app':
2469 2472 # Env variable set by py2app
2470 2473 return [encoding.environ['EXECUTABLEPATH']]
2471 2474 else:
2472 2475 return [pycompat.sysexecutable]
2473 2476 return gethgcmd()
2474 2477
2475 2478 def rundetached(args, condfn):
2476 2479 """Execute the argument list in a detached process.
2477 2480
2478 2481 condfn is a callable which is called repeatedly and should return
2479 2482 True once the child process is known to have started successfully.
2480 2483 At this point, the child process PID is returned. If the child
2481 2484 process fails to start or finishes before condfn() evaluates to
2482 2485 True, return -1.
2483 2486 """
2484 2487 # Windows case is easier because the child process is either
2485 2488 # successfully starting and validating the condition or exiting
2486 2489 # on failure. We just poll on its PID. On Unix, if the child
2487 2490 # process fails to start, it will be left in a zombie state until
2488 2491 # the parent wait on it, which we cannot do since we expect a long
2489 2492 # running process on success. Instead we listen for SIGCHLD telling
2490 2493 # us our child process terminated.
2491 2494 terminated = set()
2492 2495 def handler(signum, frame):
2493 2496 terminated.add(os.wait())
2494 2497 prevhandler = None
2495 2498 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2496 2499 if SIGCHLD is not None:
2497 2500 prevhandler = signal.signal(SIGCHLD, handler)
2498 2501 try:
2499 2502 pid = spawndetached(args)
2500 2503 while not condfn():
2501 2504 if ((pid in terminated or not testpid(pid))
2502 2505 and not condfn()):
2503 2506 return -1
2504 2507 time.sleep(0.1)
2505 2508 return pid
2506 2509 finally:
2507 2510 if prevhandler is not None:
2508 2511 signal.signal(signal.SIGCHLD, prevhandler)
2509 2512
2510 2513 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2511 2514 """Return the result of interpolating items in the mapping into string s.
2512 2515
2513 2516 prefix is a single character string, or a two character string with
2514 2517 a backslash as the first character if the prefix needs to be escaped in
2515 2518 a regular expression.
2516 2519
2517 2520 fn is an optional function that will be applied to the replacement text
2518 2521 just before replacement.
2519 2522
2520 2523 escape_prefix is an optional flag that allows using doubled prefix for
2521 2524 its escaping.
2522 2525 """
2523 2526 fn = fn or (lambda s: s)
2524 2527 patterns = '|'.join(mapping.keys())
2525 2528 if escape_prefix:
2526 2529 patterns += '|' + prefix
2527 2530 if len(prefix) > 1:
2528 2531 prefix_char = prefix[1:]
2529 2532 else:
2530 2533 prefix_char = prefix
2531 2534 mapping[prefix_char] = prefix_char
2532 2535 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2533 2536 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2534 2537
2535 2538 def getport(port):
2536 2539 """Return the port for a given network service.
2537 2540
2538 2541 If port is an integer, it's returned as is. If it's a string, it's
2539 2542 looked up using socket.getservbyname(). If there's no matching
2540 2543 service, error.Abort is raised.
2541 2544 """
2542 2545 try:
2543 2546 return int(port)
2544 2547 except ValueError:
2545 2548 pass
2546 2549
2547 2550 try:
2548 2551 return socket.getservbyname(port)
2549 2552 except socket.error:
2550 2553 raise Abort(_("no port number associated with service '%s'") % port)
2551 2554
2552 2555 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2553 2556 '0': False, 'no': False, 'false': False, 'off': False,
2554 2557 'never': False}
2555 2558
2556 2559 def parsebool(s):
2557 2560 """Parse s into a boolean.
2558 2561
2559 2562 If s is not a valid boolean, returns None.
2560 2563 """
2561 2564 return _booleans.get(s.lower(), None)
2562 2565
2563 2566 _hextochr = dict((a + b, chr(int(a + b, 16)))
2564 2567 for a in string.hexdigits for b in string.hexdigits)
2565 2568
2566 2569 class url(object):
2567 2570 r"""Reliable URL parser.
2568 2571
2569 2572 This parses URLs and provides attributes for the following
2570 2573 components:
2571 2574
2572 2575 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2573 2576
2574 2577 Missing components are set to None. The only exception is
2575 2578 fragment, which is set to '' if present but empty.
2576 2579
2577 2580 If parsefragment is False, fragment is included in query. If
2578 2581 parsequery is False, query is included in path. If both are
2579 2582 False, both fragment and query are included in path.
2580 2583
2581 2584 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2582 2585
2583 2586 Note that for backward compatibility reasons, bundle URLs do not
2584 2587 take host names. That means 'bundle://../' has a path of '../'.
2585 2588
2586 2589 Examples:
2587 2590
2588 2591 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2589 2592 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2590 2593 >>> url('ssh://[::1]:2200//home/joe/repo')
2591 2594 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2592 2595 >>> url('file:///home/joe/repo')
2593 2596 <url scheme: 'file', path: '/home/joe/repo'>
2594 2597 >>> url('file:///c:/temp/foo/')
2595 2598 <url scheme: 'file', path: 'c:/temp/foo/'>
2596 2599 >>> url('bundle:foo')
2597 2600 <url scheme: 'bundle', path: 'foo'>
2598 2601 >>> url('bundle://../foo')
2599 2602 <url scheme: 'bundle', path: '../foo'>
2600 2603 >>> url(r'c:\foo\bar')
2601 2604 <url path: 'c:\\foo\\bar'>
2602 2605 >>> url(r'\\blah\blah\blah')
2603 2606 <url path: '\\\\blah\\blah\\blah'>
2604 2607 >>> url(r'\\blah\blah\blah#baz')
2605 2608 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2606 2609 >>> url(r'file:///C:\users\me')
2607 2610 <url scheme: 'file', path: 'C:\\users\\me'>
2608 2611
2609 2612 Authentication credentials:
2610 2613
2611 2614 >>> url('ssh://joe:xyz@x/repo')
2612 2615 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2613 2616 >>> url('ssh://joe@x/repo')
2614 2617 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2615 2618
2616 2619 Query strings and fragments:
2617 2620
2618 2621 >>> url('http://host/a?b#c')
2619 2622 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2620 2623 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2621 2624 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2622 2625
2623 2626 Empty path:
2624 2627
2625 2628 >>> url('')
2626 2629 <url path: ''>
2627 2630 >>> url('#a')
2628 2631 <url path: '', fragment: 'a'>
2629 2632 >>> url('http://host/')
2630 2633 <url scheme: 'http', host: 'host', path: ''>
2631 2634 >>> url('http://host/#a')
2632 2635 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2633 2636
2634 2637 Only scheme:
2635 2638
2636 2639 >>> url('http:')
2637 2640 <url scheme: 'http'>
2638 2641 """
2639 2642
2640 2643 _safechars = "!~*'()+"
2641 2644 _safepchars = "/!~*'()+:\\"
2642 2645 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2643 2646
2644 2647 def __init__(self, path, parsequery=True, parsefragment=True):
2645 2648 # We slowly chomp away at path until we have only the path left
2646 2649 self.scheme = self.user = self.passwd = self.host = None
2647 2650 self.port = self.path = self.query = self.fragment = None
2648 2651 self._localpath = True
2649 2652 self._hostport = ''
2650 2653 self._origpath = path
2651 2654
2652 2655 if parsefragment and '#' in path:
2653 2656 path, self.fragment = path.split('#', 1)
2654 2657
2655 2658 # special case for Windows drive letters and UNC paths
2656 2659 if hasdriveletter(path) or path.startswith('\\\\'):
2657 2660 self.path = path
2658 2661 return
2659 2662
2660 2663 # For compatibility reasons, we can't handle bundle paths as
2661 2664 # normal URLS
2662 2665 if path.startswith('bundle:'):
2663 2666 self.scheme = 'bundle'
2664 2667 path = path[7:]
2665 2668 if path.startswith('//'):
2666 2669 path = path[2:]
2667 2670 self.path = path
2668 2671 return
2669 2672
2670 2673 if self._matchscheme(path):
2671 2674 parts = path.split(':', 1)
2672 2675 if parts[0]:
2673 2676 self.scheme, path = parts
2674 2677 self._localpath = False
2675 2678
2676 2679 if not path:
2677 2680 path = None
2678 2681 if self._localpath:
2679 2682 self.path = ''
2680 2683 return
2681 2684 else:
2682 2685 if self._localpath:
2683 2686 self.path = path
2684 2687 return
2685 2688
2686 2689 if parsequery and '?' in path:
2687 2690 path, self.query = path.split('?', 1)
2688 2691 if not path:
2689 2692 path = None
2690 2693 if not self.query:
2691 2694 self.query = None
2692 2695
2693 2696 # // is required to specify a host/authority
2694 2697 if path and path.startswith('//'):
2695 2698 parts = path[2:].split('/', 1)
2696 2699 if len(parts) > 1:
2697 2700 self.host, path = parts
2698 2701 else:
2699 2702 self.host = parts[0]
2700 2703 path = None
2701 2704 if not self.host:
2702 2705 self.host = None
2703 2706 # path of file:///d is /d
2704 2707 # path of file:///d:/ is d:/, not /d:/
2705 2708 if path and not hasdriveletter(path):
2706 2709 path = '/' + path
2707 2710
2708 2711 if self.host and '@' in self.host:
2709 2712 self.user, self.host = self.host.rsplit('@', 1)
2710 2713 if ':' in self.user:
2711 2714 self.user, self.passwd = self.user.split(':', 1)
2712 2715 if not self.host:
2713 2716 self.host = None
2714 2717
2715 2718 # Don't split on colons in IPv6 addresses without ports
2716 2719 if (self.host and ':' in self.host and
2717 2720 not (self.host.startswith('[') and self.host.endswith(']'))):
2718 2721 self._hostport = self.host
2719 2722 self.host, self.port = self.host.rsplit(':', 1)
2720 2723 if not self.host:
2721 2724 self.host = None
2722 2725
2723 2726 if (self.host and self.scheme == 'file' and
2724 2727 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2725 2728 raise Abort(_('file:// URLs can only refer to localhost'))
2726 2729
2727 2730 self.path = path
2728 2731
2729 2732 # leave the query string escaped
2730 2733 for a in ('user', 'passwd', 'host', 'port',
2731 2734 'path', 'fragment'):
2732 2735 v = getattr(self, a)
2733 2736 if v is not None:
2734 2737 setattr(self, a, urlreq.unquote(v))
2735 2738
2736 2739 def __repr__(self):
2737 2740 attrs = []
2738 2741 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2739 2742 'query', 'fragment'):
2740 2743 v = getattr(self, a)
2741 2744 if v is not None:
2742 2745 attrs.append('%s: %r' % (a, v))
2743 2746 return '<url %s>' % ', '.join(attrs)
2744 2747
2745 2748 def __str__(self):
2746 2749 r"""Join the URL's components back into a URL string.
2747 2750
2748 2751 Examples:
2749 2752
2750 2753 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2751 2754 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2752 2755 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2753 2756 'http://user:pw@host:80/?foo=bar&baz=42'
2754 2757 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2755 2758 'http://user:pw@host:80/?foo=bar%3dbaz'
2756 2759 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2757 2760 'ssh://user:pw@[::1]:2200//home/joe#'
2758 2761 >>> str(url('http://localhost:80//'))
2759 2762 'http://localhost:80//'
2760 2763 >>> str(url('http://localhost:80/'))
2761 2764 'http://localhost:80/'
2762 2765 >>> str(url('http://localhost:80'))
2763 2766 'http://localhost:80/'
2764 2767 >>> str(url('bundle:foo'))
2765 2768 'bundle:foo'
2766 2769 >>> str(url('bundle://../foo'))
2767 2770 'bundle:../foo'
2768 2771 >>> str(url('path'))
2769 2772 'path'
2770 2773 >>> str(url('file:///tmp/foo/bar'))
2771 2774 'file:///tmp/foo/bar'
2772 2775 >>> str(url('file:///c:/tmp/foo/bar'))
2773 2776 'file:///c:/tmp/foo/bar'
2774 2777 >>> print url(r'bundle:foo\bar')
2775 2778 bundle:foo\bar
2776 2779 >>> print url(r'file:///D:\data\hg')
2777 2780 file:///D:\data\hg
2778 2781 """
2779 2782 return encoding.strfromlocal(self.__bytes__())
2780 2783
2781 2784 def __bytes__(self):
2782 2785 if self._localpath:
2783 2786 s = self.path
2784 2787 if self.scheme == 'bundle':
2785 2788 s = 'bundle:' + s
2786 2789 if self.fragment:
2787 2790 s += '#' + self.fragment
2788 2791 return s
2789 2792
2790 2793 s = self.scheme + ':'
2791 2794 if self.user or self.passwd or self.host:
2792 2795 s += '//'
2793 2796 elif self.scheme and (not self.path or self.path.startswith('/')
2794 2797 or hasdriveletter(self.path)):
2795 2798 s += '//'
2796 2799 if hasdriveletter(self.path):
2797 2800 s += '/'
2798 2801 if self.user:
2799 2802 s += urlreq.quote(self.user, safe=self._safechars)
2800 2803 if self.passwd:
2801 2804 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2802 2805 if self.user or self.passwd:
2803 2806 s += '@'
2804 2807 if self.host:
2805 2808 if not (self.host.startswith('[') and self.host.endswith(']')):
2806 2809 s += urlreq.quote(self.host)
2807 2810 else:
2808 2811 s += self.host
2809 2812 if self.port:
2810 2813 s += ':' + urlreq.quote(self.port)
2811 2814 if self.host:
2812 2815 s += '/'
2813 2816 if self.path:
2814 2817 # TODO: similar to the query string, we should not unescape the
2815 2818 # path when we store it, the path might contain '%2f' = '/',
2816 2819 # which we should *not* escape.
2817 2820 s += urlreq.quote(self.path, safe=self._safepchars)
2818 2821 if self.query:
2819 2822 # we store the query in escaped form.
2820 2823 s += '?' + self.query
2821 2824 if self.fragment is not None:
2822 2825 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2823 2826 return s
2824 2827
2825 2828 def authinfo(self):
2826 2829 user, passwd = self.user, self.passwd
2827 2830 try:
2828 2831 self.user, self.passwd = None, None
2829 2832 s = bytes(self)
2830 2833 finally:
2831 2834 self.user, self.passwd = user, passwd
2832 2835 if not self.user:
2833 2836 return (s, None)
2834 2837 # authinfo[1] is passed to urllib2 password manager, and its
2835 2838 # URIs must not contain credentials. The host is passed in the
2836 2839 # URIs list because Python < 2.4.3 uses only that to search for
2837 2840 # a password.
2838 2841 return (s, (None, (s, self.host),
2839 2842 self.user, self.passwd or ''))
2840 2843
2841 2844 def isabs(self):
2842 2845 if self.scheme and self.scheme != 'file':
2843 2846 return True # remote URL
2844 2847 if hasdriveletter(self.path):
2845 2848 return True # absolute for our purposes - can't be joined()
2846 2849 if self.path.startswith(r'\\'):
2847 2850 return True # Windows UNC path
2848 2851 if self.path.startswith('/'):
2849 2852 return True # POSIX-style
2850 2853 return False
2851 2854
2852 2855 def localpath(self):
2853 2856 if self.scheme == 'file' or self.scheme == 'bundle':
2854 2857 path = self.path or '/'
2855 2858 # For Windows, we need to promote hosts containing drive
2856 2859 # letters to paths with drive letters.
2857 2860 if hasdriveletter(self._hostport):
2858 2861 path = self._hostport + '/' + self.path
2859 2862 elif (self.host is not None and self.path
2860 2863 and not hasdriveletter(path)):
2861 2864 path = '/' + path
2862 2865 return path
2863 2866 return self._origpath
2864 2867
2865 2868 def islocal(self):
2866 2869 '''whether localpath will return something that posixfile can open'''
2867 2870 return (not self.scheme or self.scheme == 'file'
2868 2871 or self.scheme == 'bundle')
2869 2872
2870 2873 def hasscheme(path):
2871 2874 return bool(url(path).scheme)
2872 2875
2873 2876 def hasdriveletter(path):
2874 2877 return path and path[1:2] == ':' and path[0:1].isalpha()
2875 2878
2876 2879 def urllocalpath(path):
2877 2880 return url(path, parsequery=False, parsefragment=False).localpath()
2878 2881
2879 2882 def hidepassword(u):
2880 2883 '''hide user credential in a url string'''
2881 2884 u = url(u)
2882 2885 if u.passwd:
2883 2886 u.passwd = '***'
2884 2887 return bytes(u)
2885 2888
2886 2889 def removeauth(u):
2887 2890 '''remove all authentication information from a url string'''
2888 2891 u = url(u)
2889 2892 u.user = u.passwd = None
2890 2893 return str(u)
2891 2894
2892 2895 timecount = unitcountfn(
2893 2896 (1, 1e3, _('%.0f s')),
2894 2897 (100, 1, _('%.1f s')),
2895 2898 (10, 1, _('%.2f s')),
2896 2899 (1, 1, _('%.3f s')),
2897 2900 (100, 0.001, _('%.1f ms')),
2898 2901 (10, 0.001, _('%.2f ms')),
2899 2902 (1, 0.001, _('%.3f ms')),
2900 2903 (100, 0.000001, _('%.1f us')),
2901 2904 (10, 0.000001, _('%.2f us')),
2902 2905 (1, 0.000001, _('%.3f us')),
2903 2906 (100, 0.000000001, _('%.1f ns')),
2904 2907 (10, 0.000000001, _('%.2f ns')),
2905 2908 (1, 0.000000001, _('%.3f ns')),
2906 2909 )
2907 2910
2908 2911 _timenesting = [0]
2909 2912
2910 2913 def timed(func):
2911 2914 '''Report the execution time of a function call to stderr.
2912 2915
2913 2916 During development, use as a decorator when you need to measure
2914 2917 the cost of a function, e.g. as follows:
2915 2918
2916 2919 @util.timed
2917 2920 def foo(a, b, c):
2918 2921 pass
2919 2922 '''
2920 2923
2921 2924 def wrapper(*args, **kwargs):
2922 2925 start = timer()
2923 2926 indent = 2
2924 2927 _timenesting[0] += indent
2925 2928 try:
2926 2929 return func(*args, **kwargs)
2927 2930 finally:
2928 2931 elapsed = timer() - start
2929 2932 _timenesting[0] -= indent
2930 2933 stderr.write('%s%s: %s\n' %
2931 2934 (' ' * _timenesting[0], func.__name__,
2932 2935 timecount(elapsed)))
2933 2936 return wrapper
2934 2937
2935 2938 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
2936 2939 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
2937 2940
2938 2941 def sizetoint(s):
2939 2942 '''Convert a space specifier to a byte count.
2940 2943
2941 2944 >>> sizetoint('30')
2942 2945 30
2943 2946 >>> sizetoint('2.2kb')
2944 2947 2252
2945 2948 >>> sizetoint('6M')
2946 2949 6291456
2947 2950 '''
2948 2951 t = s.strip().lower()
2949 2952 try:
2950 2953 for k, u in _sizeunits:
2951 2954 if t.endswith(k):
2952 2955 return int(float(t[:-len(k)]) * u)
2953 2956 return int(t)
2954 2957 except ValueError:
2955 2958 raise error.ParseError(_("couldn't parse size: %s") % s)
2956 2959
2957 2960 class hooks(object):
2958 2961 '''A collection of hook functions that can be used to extend a
2959 2962 function's behavior. Hooks are called in lexicographic order,
2960 2963 based on the names of their sources.'''
2961 2964
2962 2965 def __init__(self):
2963 2966 self._hooks = []
2964 2967
2965 2968 def add(self, source, hook):
2966 2969 self._hooks.append((source, hook))
2967 2970
2968 2971 def __call__(self, *args):
2969 2972 self._hooks.sort(key=lambda x: x[0])
2970 2973 results = []
2971 2974 for source, hook in self._hooks:
2972 2975 results.append(hook(*args))
2973 2976 return results
2974 2977
2975 2978 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
2976 2979 '''Yields lines for a nicely formatted stacktrace.
2977 2980 Skips the 'skip' last entries, then return the last 'depth' entries.
2978 2981 Each file+linenumber is formatted according to fileline.
2979 2982 Each line is formatted according to line.
2980 2983 If line is None, it yields:
2981 2984 length of longest filepath+line number,
2982 2985 filepath+linenumber,
2983 2986 function
2984 2987
2985 2988 Not be used in production code but very convenient while developing.
2986 2989 '''
2987 2990 entries = [(fileline % (fn, ln), func)
2988 2991 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
2989 2992 ][-depth:]
2990 2993 if entries:
2991 2994 fnmax = max(len(entry[0]) for entry in entries)
2992 2995 for fnln, func in entries:
2993 2996 if line is None:
2994 2997 yield (fnmax, fnln, func)
2995 2998 else:
2996 2999 yield line % (fnmax, fnln, func)
2997 3000
2998 3001 def debugstacktrace(msg='stacktrace', skip=0,
2999 3002 f=stderr, otherf=stdout, depth=0):
3000 3003 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3001 3004 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3002 3005 By default it will flush stdout first.
3003 3006 It can be used everywhere and intentionally does not require an ui object.
3004 3007 Not be used in production code but very convenient while developing.
3005 3008 '''
3006 3009 if otherf:
3007 3010 otherf.flush()
3008 3011 f.write('%s at:\n' % msg.rstrip())
3009 3012 for line in getstackframes(skip + 1, depth=depth):
3010 3013 f.write(line)
3011 3014 f.flush()
3012 3015
3013 3016 class dirs(object):
3014 3017 '''a multiset of directory names from a dirstate or manifest'''
3015 3018
3016 3019 def __init__(self, map, skip=None):
3017 3020 self._dirs = {}
3018 3021 addpath = self.addpath
3019 3022 if safehasattr(map, 'iteritems') and skip is not None:
3020 3023 for f, s in map.iteritems():
3021 3024 if s[0] != skip:
3022 3025 addpath(f)
3023 3026 else:
3024 3027 for f in map:
3025 3028 addpath(f)
3026 3029
3027 3030 def addpath(self, path):
3028 3031 dirs = self._dirs
3029 3032 for base in finddirs(path):
3030 3033 if base in dirs:
3031 3034 dirs[base] += 1
3032 3035 return
3033 3036 dirs[base] = 1
3034 3037
3035 3038 def delpath(self, path):
3036 3039 dirs = self._dirs
3037 3040 for base in finddirs(path):
3038 3041 if dirs[base] > 1:
3039 3042 dirs[base] -= 1
3040 3043 return
3041 3044 del dirs[base]
3042 3045
3043 3046 def __iter__(self):
3044 3047 return iter(self._dirs)
3045 3048
3046 3049 def __contains__(self, d):
3047 3050 return d in self._dirs
3048 3051
3049 3052 if safehasattr(parsers, 'dirs'):
3050 3053 dirs = parsers.dirs
3051 3054
3052 3055 def finddirs(path):
3053 3056 pos = path.rfind('/')
3054 3057 while pos != -1:
3055 3058 yield path[:pos]
3056 3059 pos = path.rfind('/', 0, pos)
3057 3060
3058 3061 class ctxmanager(object):
3059 3062 '''A context manager for use in 'with' blocks to allow multiple
3060 3063 contexts to be entered at once. This is both safer and more
3061 3064 flexible than contextlib.nested.
3062 3065
3063 3066 Once Mercurial supports Python 2.7+, this will become mostly
3064 3067 unnecessary.
3065 3068 '''
3066 3069
3067 3070 def __init__(self, *args):
3068 3071 '''Accepts a list of no-argument functions that return context
3069 3072 managers. These will be invoked at __call__ time.'''
3070 3073 self._pending = args
3071 3074 self._atexit = []
3072 3075
3073 3076 def __enter__(self):
3074 3077 return self
3075 3078
3076 3079 def enter(self):
3077 3080 '''Create and enter context managers in the order in which they were
3078 3081 passed to the constructor.'''
3079 3082 values = []
3080 3083 for func in self._pending:
3081 3084 obj = func()
3082 3085 values.append(obj.__enter__())
3083 3086 self._atexit.append(obj.__exit__)
3084 3087 del self._pending
3085 3088 return values
3086 3089
3087 3090 def atexit(self, func, *args, **kwargs):
3088 3091 '''Add a function to call when this context manager exits. The
3089 3092 ordering of multiple atexit calls is unspecified, save that
3090 3093 they will happen before any __exit__ functions.'''
3091 3094 def wrapper(exc_type, exc_val, exc_tb):
3092 3095 func(*args, **kwargs)
3093 3096 self._atexit.append(wrapper)
3094 3097 return func
3095 3098
3096 3099 def __exit__(self, exc_type, exc_val, exc_tb):
3097 3100 '''Context managers are exited in the reverse order from which
3098 3101 they were created.'''
3099 3102 received = exc_type is not None
3100 3103 suppressed = False
3101 3104 pending = None
3102 3105 self._atexit.reverse()
3103 3106 for exitfunc in self._atexit:
3104 3107 try:
3105 3108 if exitfunc(exc_type, exc_val, exc_tb):
3106 3109 suppressed = True
3107 3110 exc_type = None
3108 3111 exc_val = None
3109 3112 exc_tb = None
3110 3113 except BaseException:
3111 3114 pending = sys.exc_info()
3112 3115 exc_type, exc_val, exc_tb = pending = sys.exc_info()
3113 3116 del self._atexit
3114 3117 if pending:
3115 3118 raise exc_val
3116 3119 return received and suppressed
3117 3120
3118 3121 # compression code
3119 3122
3120 3123 SERVERROLE = 'server'
3121 3124 CLIENTROLE = 'client'
3122 3125
3123 3126 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3124 3127 (u'name', u'serverpriority',
3125 3128 u'clientpriority'))
3126 3129
3127 3130 class compressormanager(object):
3128 3131 """Holds registrations of various compression engines.
3129 3132
3130 3133 This class essentially abstracts the differences between compression
3131 3134 engines to allow new compression formats to be added easily, possibly from
3132 3135 extensions.
3133 3136
3134 3137 Compressors are registered against the global instance by calling its
3135 3138 ``register()`` method.
3136 3139 """
3137 3140 def __init__(self):
3138 3141 self._engines = {}
3139 3142 # Bundle spec human name to engine name.
3140 3143 self._bundlenames = {}
3141 3144 # Internal bundle identifier to engine name.
3142 3145 self._bundletypes = {}
3143 3146 # Revlog header to engine name.
3144 3147 self._revlogheaders = {}
3145 3148 # Wire proto identifier to engine name.
3146 3149 self._wiretypes = {}
3147 3150
3148 3151 def __getitem__(self, key):
3149 3152 return self._engines[key]
3150 3153
3151 3154 def __contains__(self, key):
3152 3155 return key in self._engines
3153 3156
3154 3157 def __iter__(self):
3155 3158 return iter(self._engines.keys())
3156 3159
3157 3160 def register(self, engine):
3158 3161 """Register a compression engine with the manager.
3159 3162
3160 3163 The argument must be a ``compressionengine`` instance.
3161 3164 """
3162 3165 if not isinstance(engine, compressionengine):
3163 3166 raise ValueError(_('argument must be a compressionengine'))
3164 3167
3165 3168 name = engine.name()
3166 3169
3167 3170 if name in self._engines:
3168 3171 raise error.Abort(_('compression engine %s already registered') %
3169 3172 name)
3170 3173
3171 3174 bundleinfo = engine.bundletype()
3172 3175 if bundleinfo:
3173 3176 bundlename, bundletype = bundleinfo
3174 3177
3175 3178 if bundlename in self._bundlenames:
3176 3179 raise error.Abort(_('bundle name %s already registered') %
3177 3180 bundlename)
3178 3181 if bundletype in self._bundletypes:
3179 3182 raise error.Abort(_('bundle type %s already registered by %s') %
3180 3183 (bundletype, self._bundletypes[bundletype]))
3181 3184
3182 3185 # No external facing name declared.
3183 3186 if bundlename:
3184 3187 self._bundlenames[bundlename] = name
3185 3188
3186 3189 self._bundletypes[bundletype] = name
3187 3190
3188 3191 wiresupport = engine.wireprotosupport()
3189 3192 if wiresupport:
3190 3193 wiretype = wiresupport.name
3191 3194 if wiretype in self._wiretypes:
3192 3195 raise error.Abort(_('wire protocol compression %s already '
3193 3196 'registered by %s') %
3194 3197 (wiretype, self._wiretypes[wiretype]))
3195 3198
3196 3199 self._wiretypes[wiretype] = name
3197 3200
3198 3201 revlogheader = engine.revlogheader()
3199 3202 if revlogheader and revlogheader in self._revlogheaders:
3200 3203 raise error.Abort(_('revlog header %s already registered by %s') %
3201 3204 (revlogheader, self._revlogheaders[revlogheader]))
3202 3205
3203 3206 if revlogheader:
3204 3207 self._revlogheaders[revlogheader] = name
3205 3208
3206 3209 self._engines[name] = engine
3207 3210
3208 3211 @property
3209 3212 def supportedbundlenames(self):
3210 3213 return set(self._bundlenames.keys())
3211 3214
3212 3215 @property
3213 3216 def supportedbundletypes(self):
3214 3217 return set(self._bundletypes.keys())
3215 3218
3216 3219 def forbundlename(self, bundlename):
3217 3220 """Obtain a compression engine registered to a bundle name.
3218 3221
3219 3222 Will raise KeyError if the bundle type isn't registered.
3220 3223
3221 3224 Will abort if the engine is known but not available.
3222 3225 """
3223 3226 engine = self._engines[self._bundlenames[bundlename]]
3224 3227 if not engine.available():
3225 3228 raise error.Abort(_('compression engine %s could not be loaded') %
3226 3229 engine.name())
3227 3230 return engine
3228 3231
3229 3232 def forbundletype(self, bundletype):
3230 3233 """Obtain a compression engine registered to a bundle type.
3231 3234
3232 3235 Will raise KeyError if the bundle type isn't registered.
3233 3236
3234 3237 Will abort if the engine is known but not available.
3235 3238 """
3236 3239 engine = self._engines[self._bundletypes[bundletype]]
3237 3240 if not engine.available():
3238 3241 raise error.Abort(_('compression engine %s could not be loaded') %
3239 3242 engine.name())
3240 3243 return engine
3241 3244
3242 3245 def supportedwireengines(self, role, onlyavailable=True):
3243 3246 """Obtain compression engines that support the wire protocol.
3244 3247
3245 3248 Returns a list of engines in prioritized order, most desired first.
3246 3249
3247 3250 If ``onlyavailable`` is set, filter out engines that can't be
3248 3251 loaded.
3249 3252 """
3250 3253 assert role in (SERVERROLE, CLIENTROLE)
3251 3254
3252 3255 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3253 3256
3254 3257 engines = [self._engines[e] for e in self._wiretypes.values()]
3255 3258 if onlyavailable:
3256 3259 engines = [e for e in engines if e.available()]
3257 3260
3258 3261 def getkey(e):
3259 3262 # Sort first by priority, highest first. In case of tie, sort
3260 3263 # alphabetically. This is arbitrary, but ensures output is
3261 3264 # stable.
3262 3265 w = e.wireprotosupport()
3263 3266 return -1 * getattr(w, attr), w.name
3264 3267
3265 3268 return list(sorted(engines, key=getkey))
3266 3269
3267 3270 def forwiretype(self, wiretype):
3268 3271 engine = self._engines[self._wiretypes[wiretype]]
3269 3272 if not engine.available():
3270 3273 raise error.Abort(_('compression engine %s could not be loaded') %
3271 3274 engine.name())
3272 3275 return engine
3273 3276
3274 3277 def forrevlogheader(self, header):
3275 3278 """Obtain a compression engine registered to a revlog header.
3276 3279
3277 3280 Will raise KeyError if the revlog header value isn't registered.
3278 3281 """
3279 3282 return self._engines[self._revlogheaders[header]]
3280 3283
3281 3284 compengines = compressormanager()
3282 3285
3283 3286 class compressionengine(object):
3284 3287 """Base class for compression engines.
3285 3288
3286 3289 Compression engines must implement the interface defined by this class.
3287 3290 """
3288 3291 def name(self):
3289 3292 """Returns the name of the compression engine.
3290 3293
3291 3294 This is the key the engine is registered under.
3292 3295
3293 3296 This method must be implemented.
3294 3297 """
3295 3298 raise NotImplementedError()
3296 3299
3297 3300 def available(self):
3298 3301 """Whether the compression engine is available.
3299 3302
3300 3303 The intent of this method is to allow optional compression engines
3301 3304 that may not be available in all installations (such as engines relying
3302 3305 on C extensions that may not be present).
3303 3306 """
3304 3307 return True
3305 3308
3306 3309 def bundletype(self):
3307 3310 """Describes bundle identifiers for this engine.
3308 3311
3309 3312 If this compression engine isn't supported for bundles, returns None.
3310 3313
3311 3314 If this engine can be used for bundles, returns a 2-tuple of strings of
3312 3315 the user-facing "bundle spec" compression name and an internal
3313 3316 identifier used to denote the compression format within bundles. To
3314 3317 exclude the name from external usage, set the first element to ``None``.
3315 3318
3316 3319 If bundle compression is supported, the class must also implement
3317 3320 ``compressstream`` and `decompressorreader``.
3318 3321
3319 3322 The docstring of this method is used in the help system to tell users
3320 3323 about this engine.
3321 3324 """
3322 3325 return None
3323 3326
3324 3327 def wireprotosupport(self):
3325 3328 """Declare support for this compression format on the wire protocol.
3326 3329
3327 3330 If this compression engine isn't supported for compressing wire
3328 3331 protocol payloads, returns None.
3329 3332
3330 3333 Otherwise, returns ``compenginewireprotosupport`` with the following
3331 3334 fields:
3332 3335
3333 3336 * String format identifier
3334 3337 * Integer priority for the server
3335 3338 * Integer priority for the client
3336 3339
3337 3340 The integer priorities are used to order the advertisement of format
3338 3341 support by server and client. The highest integer is advertised
3339 3342 first. Integers with non-positive values aren't advertised.
3340 3343
3341 3344 The priority values are somewhat arbitrary and only used for default
3342 3345 ordering. The relative order can be changed via config options.
3343 3346
3344 3347 If wire protocol compression is supported, the class must also implement
3345 3348 ``compressstream`` and ``decompressorreader``.
3346 3349 """
3347 3350 return None
3348 3351
3349 3352 def revlogheader(self):
3350 3353 """Header added to revlog chunks that identifies this engine.
3351 3354
3352 3355 If this engine can be used to compress revlogs, this method should
3353 3356 return the bytes used to identify chunks compressed with this engine.
3354 3357 Else, the method should return ``None`` to indicate it does not
3355 3358 participate in revlog compression.
3356 3359 """
3357 3360 return None
3358 3361
3359 3362 def compressstream(self, it, opts=None):
3360 3363 """Compress an iterator of chunks.
3361 3364
3362 3365 The method receives an iterator (ideally a generator) of chunks of
3363 3366 bytes to be compressed. It returns an iterator (ideally a generator)
3364 3367 of bytes of chunks representing the compressed output.
3365 3368
3366 3369 Optionally accepts an argument defining how to perform compression.
3367 3370 Each engine treats this argument differently.
3368 3371 """
3369 3372 raise NotImplementedError()
3370 3373
3371 3374 def decompressorreader(self, fh):
3372 3375 """Perform decompression on a file object.
3373 3376
3374 3377 Argument is an object with a ``read(size)`` method that returns
3375 3378 compressed data. Return value is an object with a ``read(size)`` that
3376 3379 returns uncompressed data.
3377 3380 """
3378 3381 raise NotImplementedError()
3379 3382
3380 3383 def revlogcompressor(self, opts=None):
3381 3384 """Obtain an object that can be used to compress revlog entries.
3382 3385
3383 3386 The object has a ``compress(data)`` method that compresses binary
3384 3387 data. This method returns compressed binary data or ``None`` if
3385 3388 the data could not be compressed (too small, not compressible, etc).
3386 3389 The returned data should have a header uniquely identifying this
3387 3390 compression format so decompression can be routed to this engine.
3388 3391 This header should be identified by the ``revlogheader()`` return
3389 3392 value.
3390 3393
3391 3394 The object has a ``decompress(data)`` method that decompresses
3392 3395 data. The method will only be called if ``data`` begins with
3393 3396 ``revlogheader()``. The method should return the raw, uncompressed
3394 3397 data or raise a ``RevlogError``.
3395 3398
3396 3399 The object is reusable but is not thread safe.
3397 3400 """
3398 3401 raise NotImplementedError()
3399 3402
3400 3403 class _zlibengine(compressionengine):
3401 3404 def name(self):
3402 3405 return 'zlib'
3403 3406
3404 3407 def bundletype(self):
3405 3408 """zlib compression using the DEFLATE algorithm.
3406 3409
3407 3410 All Mercurial clients should support this format. The compression
3408 3411 algorithm strikes a reasonable balance between compression ratio
3409 3412 and size.
3410 3413 """
3411 3414 return 'gzip', 'GZ'
3412 3415
3413 3416 def wireprotosupport(self):
3414 3417 return compewireprotosupport('zlib', 20, 20)
3415 3418
3416 3419 def revlogheader(self):
3417 3420 return 'x'
3418 3421
3419 3422 def compressstream(self, it, opts=None):
3420 3423 opts = opts or {}
3421 3424
3422 3425 z = zlib.compressobj(opts.get('level', -1))
3423 3426 for chunk in it:
3424 3427 data = z.compress(chunk)
3425 3428 # Not all calls to compress emit data. It is cheaper to inspect
3426 3429 # here than to feed empty chunks through generator.
3427 3430 if data:
3428 3431 yield data
3429 3432
3430 3433 yield z.flush()
3431 3434
3432 3435 def decompressorreader(self, fh):
3433 3436 def gen():
3434 3437 d = zlib.decompressobj()
3435 3438 for chunk in filechunkiter(fh):
3436 3439 while chunk:
3437 3440 # Limit output size to limit memory.
3438 3441 yield d.decompress(chunk, 2 ** 18)
3439 3442 chunk = d.unconsumed_tail
3440 3443
3441 3444 return chunkbuffer(gen())
3442 3445
3443 3446 class zlibrevlogcompressor(object):
3444 3447 def compress(self, data):
3445 3448 insize = len(data)
3446 3449 # Caller handles empty input case.
3447 3450 assert insize > 0
3448 3451
3449 3452 if insize < 44:
3450 3453 return None
3451 3454
3452 3455 elif insize <= 1000000:
3453 3456 compressed = zlib.compress(data)
3454 3457 if len(compressed) < insize:
3455 3458 return compressed
3456 3459 return None
3457 3460
3458 3461 # zlib makes an internal copy of the input buffer, doubling
3459 3462 # memory usage for large inputs. So do streaming compression
3460 3463 # on large inputs.
3461 3464 else:
3462 3465 z = zlib.compressobj()
3463 3466 parts = []
3464 3467 pos = 0
3465 3468 while pos < insize:
3466 3469 pos2 = pos + 2**20
3467 3470 parts.append(z.compress(data[pos:pos2]))
3468 3471 pos = pos2
3469 3472 parts.append(z.flush())
3470 3473
3471 3474 if sum(map(len, parts)) < insize:
3472 3475 return ''.join(parts)
3473 3476 return None
3474 3477
3475 3478 def decompress(self, data):
3476 3479 try:
3477 3480 return zlib.decompress(data)
3478 3481 except zlib.error as e:
3479 3482 raise error.RevlogError(_('revlog decompress error: %s') %
3480 3483 str(e))
3481 3484
3482 3485 def revlogcompressor(self, opts=None):
3483 3486 return self.zlibrevlogcompressor()
3484 3487
3485 3488 compengines.register(_zlibengine())
3486 3489
3487 3490 class _bz2engine(compressionengine):
3488 3491 def name(self):
3489 3492 return 'bz2'
3490 3493
3491 3494 def bundletype(self):
3492 3495 """An algorithm that produces smaller bundles than ``gzip``.
3493 3496
3494 3497 All Mercurial clients should support this format.
3495 3498
3496 3499 This engine will likely produce smaller bundles than ``gzip`` but
3497 3500 will be significantly slower, both during compression and
3498 3501 decompression.
3499 3502
3500 3503 If available, the ``zstd`` engine can yield similar or better
3501 3504 compression at much higher speeds.
3502 3505 """
3503 3506 return 'bzip2', 'BZ'
3504 3507
3505 3508 # We declare a protocol name but don't advertise by default because
3506 3509 # it is slow.
3507 3510 def wireprotosupport(self):
3508 3511 return compewireprotosupport('bzip2', 0, 0)
3509 3512
3510 3513 def compressstream(self, it, opts=None):
3511 3514 opts = opts or {}
3512 3515 z = bz2.BZ2Compressor(opts.get('level', 9))
3513 3516 for chunk in it:
3514 3517 data = z.compress(chunk)
3515 3518 if data:
3516 3519 yield data
3517 3520
3518 3521 yield z.flush()
3519 3522
3520 3523 def decompressorreader(self, fh):
3521 3524 def gen():
3522 3525 d = bz2.BZ2Decompressor()
3523 3526 for chunk in filechunkiter(fh):
3524 3527 yield d.decompress(chunk)
3525 3528
3526 3529 return chunkbuffer(gen())
3527 3530
3528 3531 compengines.register(_bz2engine())
3529 3532
3530 3533 class _truncatedbz2engine(compressionengine):
3531 3534 def name(self):
3532 3535 return 'bz2truncated'
3533 3536
3534 3537 def bundletype(self):
3535 3538 return None, '_truncatedBZ'
3536 3539
3537 3540 # We don't implement compressstream because it is hackily handled elsewhere.
3538 3541
3539 3542 def decompressorreader(self, fh):
3540 3543 def gen():
3541 3544 # The input stream doesn't have the 'BZ' header. So add it back.
3542 3545 d = bz2.BZ2Decompressor()
3543 3546 d.decompress('BZ')
3544 3547 for chunk in filechunkiter(fh):
3545 3548 yield d.decompress(chunk)
3546 3549
3547 3550 return chunkbuffer(gen())
3548 3551
3549 3552 compengines.register(_truncatedbz2engine())
3550 3553
3551 3554 class _noopengine(compressionengine):
3552 3555 def name(self):
3553 3556 return 'none'
3554 3557
3555 3558 def bundletype(self):
3556 3559 """No compression is performed.
3557 3560
3558 3561 Use this compression engine to explicitly disable compression.
3559 3562 """
3560 3563 return 'none', 'UN'
3561 3564
3562 3565 # Clients always support uncompressed payloads. Servers don't because
3563 3566 # unless you are on a fast network, uncompressed payloads can easily
3564 3567 # saturate your network pipe.
3565 3568 def wireprotosupport(self):
3566 3569 return compewireprotosupport('none', 0, 10)
3567 3570
3568 3571 # We don't implement revlogheader because it is handled specially
3569 3572 # in the revlog class.
3570 3573
3571 3574 def compressstream(self, it, opts=None):
3572 3575 return it
3573 3576
3574 3577 def decompressorreader(self, fh):
3575 3578 return fh
3576 3579
3577 3580 class nooprevlogcompressor(object):
3578 3581 def compress(self, data):
3579 3582 return None
3580 3583
3581 3584 def revlogcompressor(self, opts=None):
3582 3585 return self.nooprevlogcompressor()
3583 3586
3584 3587 compengines.register(_noopengine())
3585 3588
3586 3589 class _zstdengine(compressionengine):
3587 3590 def name(self):
3588 3591 return 'zstd'
3589 3592
3590 3593 @propertycache
3591 3594 def _module(self):
3592 3595 # Not all installs have the zstd module available. So defer importing
3593 3596 # until first access.
3594 3597 try:
3595 3598 from . import zstd
3596 3599 # Force delayed import.
3597 3600 zstd.__version__
3598 3601 return zstd
3599 3602 except ImportError:
3600 3603 return None
3601 3604
3602 3605 def available(self):
3603 3606 return bool(self._module)
3604 3607
3605 3608 def bundletype(self):
3606 3609 """A modern compression algorithm that is fast and highly flexible.
3607 3610
3608 3611 Only supported by Mercurial 4.1 and newer clients.
3609 3612
3610 3613 With the default settings, zstd compression is both faster and yields
3611 3614 better compression than ``gzip``. It also frequently yields better
3612 3615 compression than ``bzip2`` while operating at much higher speeds.
3613 3616
3614 3617 If this engine is available and backwards compatibility is not a
3615 3618 concern, it is likely the best available engine.
3616 3619 """
3617 3620 return 'zstd', 'ZS'
3618 3621
3619 3622 def wireprotosupport(self):
3620 3623 return compewireprotosupport('zstd', 50, 50)
3621 3624
3622 3625 def revlogheader(self):
3623 3626 return '\x28'
3624 3627
3625 3628 def compressstream(self, it, opts=None):
3626 3629 opts = opts or {}
3627 3630 # zstd level 3 is almost always significantly faster than zlib
3628 3631 # while providing no worse compression. It strikes a good balance
3629 3632 # between speed and compression.
3630 3633 level = opts.get('level', 3)
3631 3634
3632 3635 zstd = self._module
3633 3636 z = zstd.ZstdCompressor(level=level).compressobj()
3634 3637 for chunk in it:
3635 3638 data = z.compress(chunk)
3636 3639 if data:
3637 3640 yield data
3638 3641
3639 3642 yield z.flush()
3640 3643
3641 3644 def decompressorreader(self, fh):
3642 3645 zstd = self._module
3643 3646 dctx = zstd.ZstdDecompressor()
3644 3647 return chunkbuffer(dctx.read_from(fh))
3645 3648
3646 3649 class zstdrevlogcompressor(object):
3647 3650 def __init__(self, zstd, level=3):
3648 3651 # Writing the content size adds a few bytes to the output. However,
3649 3652 # it allows decompression to be more optimal since we can
3650 3653 # pre-allocate a buffer to hold the result.
3651 3654 self._cctx = zstd.ZstdCompressor(level=level,
3652 3655 write_content_size=True)
3653 3656 self._dctx = zstd.ZstdDecompressor()
3654 3657 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3655 3658 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3656 3659
3657 3660 def compress(self, data):
3658 3661 insize = len(data)
3659 3662 # Caller handles empty input case.
3660 3663 assert insize > 0
3661 3664
3662 3665 if insize < 50:
3663 3666 return None
3664 3667
3665 3668 elif insize <= 1000000:
3666 3669 compressed = self._cctx.compress(data)
3667 3670 if len(compressed) < insize:
3668 3671 return compressed
3669 3672 return None
3670 3673 else:
3671 3674 z = self._cctx.compressobj()
3672 3675 chunks = []
3673 3676 pos = 0
3674 3677 while pos < insize:
3675 3678 pos2 = pos + self._compinsize
3676 3679 chunk = z.compress(data[pos:pos2])
3677 3680 if chunk:
3678 3681 chunks.append(chunk)
3679 3682 pos = pos2
3680 3683 chunks.append(z.flush())
3681 3684
3682 3685 if sum(map(len, chunks)) < insize:
3683 3686 return ''.join(chunks)
3684 3687 return None
3685 3688
3686 3689 def decompress(self, data):
3687 3690 insize = len(data)
3688 3691
3689 3692 try:
3690 3693 # This was measured to be faster than other streaming
3691 3694 # decompressors.
3692 3695 dobj = self._dctx.decompressobj()
3693 3696 chunks = []
3694 3697 pos = 0
3695 3698 while pos < insize:
3696 3699 pos2 = pos + self._decompinsize
3697 3700 chunk = dobj.decompress(data[pos:pos2])
3698 3701 if chunk:
3699 3702 chunks.append(chunk)
3700 3703 pos = pos2
3701 3704 # Frame should be exhausted, so no finish() API.
3702 3705
3703 3706 return ''.join(chunks)
3704 3707 except Exception as e:
3705 3708 raise error.RevlogError(_('revlog decompress error: %s') %
3706 3709 str(e))
3707 3710
3708 3711 def revlogcompressor(self, opts=None):
3709 3712 opts = opts or {}
3710 3713 return self.zstdrevlogcompressor(self._module,
3711 3714 level=opts.get('level', 3))
3712 3715
3713 3716 compengines.register(_zstdengine())
3714 3717
3715 3718 def bundlecompressiontopics():
3716 3719 """Obtains a list of available bundle compressions for use in help."""
3717 3720 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3718 3721 items = {}
3719 3722
3720 3723 # We need to format the docstring. So use a dummy object/type to hold it
3721 3724 # rather than mutating the original.
3722 3725 class docobject(object):
3723 3726 pass
3724 3727
3725 3728 for name in compengines:
3726 3729 engine = compengines[name]
3727 3730
3728 3731 if not engine.available():
3729 3732 continue
3730 3733
3731 3734 bt = engine.bundletype()
3732 3735 if not bt or not bt[0]:
3733 3736 continue
3734 3737
3735 3738 doc = pycompat.sysstr('``%s``\n %s') % (
3736 3739 bt[0], engine.bundletype.__doc__)
3737 3740
3738 3741 value = docobject()
3739 3742 value.__doc__ = doc
3740 3743
3741 3744 items[bt[0]] = value
3742 3745
3743 3746 return items
3744 3747
3745 3748 # convenient shortcut
3746 3749 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now