##// END OF EJS Templates
doctest: do not embed non-ascii characters in docstring...
Yuya Nishihara -
r34138:414a3513 default
parent child Browse files
Show More
@@ -1,585 +1,586
1 # encoding.py - character transcoding support for Mercurial
1 # encoding.py - character transcoding support for Mercurial
2 #
2 #
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
3 # Copyright 2005-2009 Matt Mackall <mpm@selenic.com> and others
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import io
10 import io
11 import locale
11 import locale
12 import os
12 import os
13 import unicodedata
13 import unicodedata
14
14
15 from . import (
15 from . import (
16 error,
16 error,
17 policy,
17 policy,
18 pycompat,
18 pycompat,
19 )
19 )
20
20
21 from .pure import (
21 from .pure import (
22 charencode as charencodepure,
22 charencode as charencodepure,
23 )
23 )
24
24
25 charencode = policy.importmod(r'charencode')
25 charencode = policy.importmod(r'charencode')
26
26
27 isasciistr = charencode.isasciistr
27 isasciistr = charencode.isasciistr
28 asciilower = charencode.asciilower
28 asciilower = charencode.asciilower
29 asciiupper = charencode.asciiupper
29 asciiupper = charencode.asciiupper
30 _jsonescapeu8fast = charencode.jsonescapeu8fast
30 _jsonescapeu8fast = charencode.jsonescapeu8fast
31
31
32 _sysstr = pycompat.sysstr
32 _sysstr = pycompat.sysstr
33
33
34 if pycompat.ispy3:
34 if pycompat.ispy3:
35 unichr = chr
35 unichr = chr
36
36
37 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
37 # These unicode characters are ignored by HFS+ (Apple Technote 1150,
38 # "Unicode Subtleties"), so we need to ignore them in some places for
38 # "Unicode Subtleties"), so we need to ignore them in some places for
39 # sanity.
39 # sanity.
40 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
40 _ignore = [unichr(int(x, 16)).encode("utf-8") for x in
41 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
41 "200c 200d 200e 200f 202a 202b 202c 202d 202e "
42 "206a 206b 206c 206d 206e 206f feff".split()]
42 "206a 206b 206c 206d 206e 206f feff".split()]
43 # verify the next function will work
43 # verify the next function will work
44 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
44 assert all(i.startswith(("\xe2", "\xef")) for i in _ignore)
45
45
46 def hfsignoreclean(s):
46 def hfsignoreclean(s):
47 """Remove codepoints ignored by HFS+ from s.
47 """Remove codepoints ignored by HFS+ from s.
48
48
49 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
49 >>> hfsignoreclean(u'.h\u200cg'.encode('utf-8'))
50 '.hg'
50 '.hg'
51 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
51 >>> hfsignoreclean(u'.h\ufeffg'.encode('utf-8'))
52 '.hg'
52 '.hg'
53 """
53 """
54 if "\xe2" in s or "\xef" in s:
54 if "\xe2" in s or "\xef" in s:
55 for c in _ignore:
55 for c in _ignore:
56 s = s.replace(c, '')
56 s = s.replace(c, '')
57 return s
57 return s
58
58
59 # encoding.environ is provided read-only, which may not be used to modify
59 # encoding.environ is provided read-only, which may not be used to modify
60 # the process environment
60 # the process environment
61 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
61 _nativeenviron = (not pycompat.ispy3 or os.supports_bytes_environ)
62 if not pycompat.ispy3:
62 if not pycompat.ispy3:
63 environ = os.environ # re-exports
63 environ = os.environ # re-exports
64 elif _nativeenviron:
64 elif _nativeenviron:
65 environ = os.environb # re-exports
65 environ = os.environb # re-exports
66 else:
66 else:
67 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
67 # preferred encoding isn't known yet; use utf-8 to avoid unicode error
68 # and recreate it once encoding is settled
68 # and recreate it once encoding is settled
69 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
69 environ = dict((k.encode(u'utf-8'), v.encode(u'utf-8'))
70 for k, v in os.environ.items()) # re-exports
70 for k, v in os.environ.items()) # re-exports
71
71
72 _encodingfixers = {
72 _encodingfixers = {
73 '646': lambda: 'ascii',
73 '646': lambda: 'ascii',
74 'ANSI_X3.4-1968': lambda: 'ascii',
74 'ANSI_X3.4-1968': lambda: 'ascii',
75 }
75 }
76
76
77 try:
77 try:
78 encoding = environ.get("HGENCODING")
78 encoding = environ.get("HGENCODING")
79 if not encoding:
79 if not encoding:
80 encoding = locale.getpreferredencoding().encode('ascii') or 'ascii'
80 encoding = locale.getpreferredencoding().encode('ascii') or 'ascii'
81 encoding = _encodingfixers.get(encoding, lambda: encoding)()
81 encoding = _encodingfixers.get(encoding, lambda: encoding)()
82 except locale.Error:
82 except locale.Error:
83 encoding = 'ascii'
83 encoding = 'ascii'
84 encodingmode = environ.get("HGENCODINGMODE", "strict")
84 encodingmode = environ.get("HGENCODINGMODE", "strict")
85 fallbackencoding = 'ISO-8859-1'
85 fallbackencoding = 'ISO-8859-1'
86
86
87 class localstr(bytes):
87 class localstr(bytes):
88 '''This class allows strings that are unmodified to be
88 '''This class allows strings that are unmodified to be
89 round-tripped to the local encoding and back'''
89 round-tripped to the local encoding and back'''
90 def __new__(cls, u, l):
90 def __new__(cls, u, l):
91 s = bytes.__new__(cls, l)
91 s = bytes.__new__(cls, l)
92 s._utf8 = u
92 s._utf8 = u
93 return s
93 return s
94 def __hash__(self):
94 def __hash__(self):
95 return hash(self._utf8) # avoid collisions in local string space
95 return hash(self._utf8) # avoid collisions in local string space
96
96
97 def tolocal(s):
97 def tolocal(s):
98 """
98 """
99 Convert a string from internal UTF-8 to local encoding
99 Convert a string from internal UTF-8 to local encoding
100
100
101 All internal strings should be UTF-8 but some repos before the
101 All internal strings should be UTF-8 but some repos before the
102 implementation of locale support may contain latin1 or possibly
102 implementation of locale support may contain latin1 or possibly
103 other character sets. We attempt to decode everything strictly
103 other character sets. We attempt to decode everything strictly
104 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
104 using UTF-8, then Latin-1, and failing that, we use UTF-8 and
105 replace unknown characters.
105 replace unknown characters.
106
106
107 The localstr class is used to cache the known UTF-8 encoding of
107 The localstr class is used to cache the known UTF-8 encoding of
108 strings next to their local representation to allow lossless
108 strings next to their local representation to allow lossless
109 round-trip conversion back to UTF-8.
109 round-trip conversion back to UTF-8.
110
110
111 >>> u = b'foo: \\xc3\\xa4' # utf-8
111 >>> u = b'foo: \\xc3\\xa4' # utf-8
112 >>> l = tolocal(u)
112 >>> l = tolocal(u)
113 >>> l
113 >>> l
114 'foo: ?'
114 'foo: ?'
115 >>> fromlocal(l)
115 >>> fromlocal(l)
116 'foo: \\xc3\\xa4'
116 'foo: \\xc3\\xa4'
117 >>> u2 = b'foo: \\xc3\\xa1'
117 >>> u2 = b'foo: \\xc3\\xa1'
118 >>> d = { l: 1, tolocal(u2): 2 }
118 >>> d = { l: 1, tolocal(u2): 2 }
119 >>> len(d) # no collision
119 >>> len(d) # no collision
120 2
120 2
121 >>> b'foo: ?' in d
121 >>> b'foo: ?' in d
122 False
122 False
123 >>> l1 = b'foo: \\xe4' # historical latin1 fallback
123 >>> l1 = b'foo: \\xe4' # historical latin1 fallback
124 >>> l = tolocal(l1)
124 >>> l = tolocal(l1)
125 >>> l
125 >>> l
126 'foo: ?'
126 'foo: ?'
127 >>> fromlocal(l) # magically in utf-8
127 >>> fromlocal(l) # magically in utf-8
128 'foo: \\xc3\\xa4'
128 'foo: \\xc3\\xa4'
129 """
129 """
130
130
131 if isasciistr(s):
131 if isasciistr(s):
132 return s
132 return s
133
133
134 try:
134 try:
135 try:
135 try:
136 # make sure string is actually stored in UTF-8
136 # make sure string is actually stored in UTF-8
137 u = s.decode('UTF-8')
137 u = s.decode('UTF-8')
138 if encoding == 'UTF-8':
138 if encoding == 'UTF-8':
139 # fast path
139 # fast path
140 return s
140 return s
141 r = u.encode(_sysstr(encoding), u"replace")
141 r = u.encode(_sysstr(encoding), u"replace")
142 if u == r.decode(_sysstr(encoding)):
142 if u == r.decode(_sysstr(encoding)):
143 # r is a safe, non-lossy encoding of s
143 # r is a safe, non-lossy encoding of s
144 return r
144 return r
145 return localstr(s, r)
145 return localstr(s, r)
146 except UnicodeDecodeError:
146 except UnicodeDecodeError:
147 # we should only get here if we're looking at an ancient changeset
147 # we should only get here if we're looking at an ancient changeset
148 try:
148 try:
149 u = s.decode(_sysstr(fallbackencoding))
149 u = s.decode(_sysstr(fallbackencoding))
150 r = u.encode(_sysstr(encoding), u"replace")
150 r = u.encode(_sysstr(encoding), u"replace")
151 if u == r.decode(_sysstr(encoding)):
151 if u == r.decode(_sysstr(encoding)):
152 # r is a safe, non-lossy encoding of s
152 # r is a safe, non-lossy encoding of s
153 return r
153 return r
154 return localstr(u.encode('UTF-8'), r)
154 return localstr(u.encode('UTF-8'), r)
155 except UnicodeDecodeError:
155 except UnicodeDecodeError:
156 u = s.decode("utf-8", "replace") # last ditch
156 u = s.decode("utf-8", "replace") # last ditch
157 # can't round-trip
157 # can't round-trip
158 return u.encode(_sysstr(encoding), u"replace")
158 return u.encode(_sysstr(encoding), u"replace")
159 except LookupError as k:
159 except LookupError as k:
160 raise error.Abort(k, hint="please check your locale settings")
160 raise error.Abort(k, hint="please check your locale settings")
161
161
162 def fromlocal(s):
162 def fromlocal(s):
163 """
163 """
164 Convert a string from the local character encoding to UTF-8
164 Convert a string from the local character encoding to UTF-8
165
165
166 We attempt to decode strings using the encoding mode set by
166 We attempt to decode strings using the encoding mode set by
167 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
167 HGENCODINGMODE, which defaults to 'strict'. In this mode, unknown
168 characters will cause an error message. Other modes include
168 characters will cause an error message. Other modes include
169 'replace', which replaces unknown characters with a special
169 'replace', which replaces unknown characters with a special
170 Unicode character, and 'ignore', which drops the character.
170 Unicode character, and 'ignore', which drops the character.
171 """
171 """
172
172
173 # can we do a lossless round-trip?
173 # can we do a lossless round-trip?
174 if isinstance(s, localstr):
174 if isinstance(s, localstr):
175 return s._utf8
175 return s._utf8
176 if isasciistr(s):
176 if isasciistr(s):
177 return s
177 return s
178
178
179 try:
179 try:
180 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
180 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
181 return u.encode("utf-8")
181 return u.encode("utf-8")
182 except UnicodeDecodeError as inst:
182 except UnicodeDecodeError as inst:
183 sub = s[max(0, inst.start - 10):inst.start + 10]
183 sub = s[max(0, inst.start - 10):inst.start + 10]
184 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
184 raise error.Abort("decoding near '%s': %s!" % (sub, inst))
185 except LookupError as k:
185 except LookupError as k:
186 raise error.Abort(k, hint="please check your locale settings")
186 raise error.Abort(k, hint="please check your locale settings")
187
187
188 def unitolocal(u):
188 def unitolocal(u):
189 """Convert a unicode string to a byte string of local encoding"""
189 """Convert a unicode string to a byte string of local encoding"""
190 return tolocal(u.encode('utf-8'))
190 return tolocal(u.encode('utf-8'))
191
191
192 def unifromlocal(s):
192 def unifromlocal(s):
193 """Convert a byte string of local encoding to a unicode string"""
193 """Convert a byte string of local encoding to a unicode string"""
194 return fromlocal(s).decode('utf-8')
194 return fromlocal(s).decode('utf-8')
195
195
196 def unimethod(bytesfunc):
196 def unimethod(bytesfunc):
197 """Create a proxy method that forwards __unicode__() and __str__() of
197 """Create a proxy method that forwards __unicode__() and __str__() of
198 Python 3 to __bytes__()"""
198 Python 3 to __bytes__()"""
199 def unifunc(obj):
199 def unifunc(obj):
200 return unifromlocal(bytesfunc(obj))
200 return unifromlocal(bytesfunc(obj))
201 return unifunc
201 return unifunc
202
202
203 # converter functions between native str and byte string. use these if the
203 # converter functions between native str and byte string. use these if the
204 # character encoding is not aware (e.g. exception message) or is known to
204 # character encoding is not aware (e.g. exception message) or is known to
205 # be locale dependent (e.g. date formatting.)
205 # be locale dependent (e.g. date formatting.)
206 if pycompat.ispy3:
206 if pycompat.ispy3:
207 strtolocal = unitolocal
207 strtolocal = unitolocal
208 strfromlocal = unifromlocal
208 strfromlocal = unifromlocal
209 strmethod = unimethod
209 strmethod = unimethod
210 else:
210 else:
211 strtolocal = pycompat.identity
211 strtolocal = pycompat.identity
212 strfromlocal = pycompat.identity
212 strfromlocal = pycompat.identity
213 strmethod = pycompat.identity
213 strmethod = pycompat.identity
214
214
215 if not _nativeenviron:
215 if not _nativeenviron:
216 # now encoding and helper functions are available, recreate the environ
216 # now encoding and helper functions are available, recreate the environ
217 # dict to be exported to other modules
217 # dict to be exported to other modules
218 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
218 environ = dict((tolocal(k.encode(u'utf-8')), tolocal(v.encode(u'utf-8')))
219 for k, v in os.environ.items()) # re-exports
219 for k, v in os.environ.items()) # re-exports
220
220
221 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
221 # How to treat ambiguous-width characters. Set to 'wide' to treat as wide.
222 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
222 _wide = _sysstr(environ.get("HGENCODINGAMBIGUOUS", "narrow") == "wide"
223 and "WFA" or "WF")
223 and "WFA" or "WF")
224
224
225 def colwidth(s):
225 def colwidth(s):
226 "Find the column width of a string for display in the local encoding"
226 "Find the column width of a string for display in the local encoding"
227 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
227 return ucolwidth(s.decode(_sysstr(encoding), u'replace'))
228
228
229 def ucolwidth(d):
229 def ucolwidth(d):
230 "Find the column width of a Unicode string for display"
230 "Find the column width of a Unicode string for display"
231 eaw = getattr(unicodedata, 'east_asian_width', None)
231 eaw = getattr(unicodedata, 'east_asian_width', None)
232 if eaw is not None:
232 if eaw is not None:
233 return sum([eaw(c) in _wide and 2 or 1 for c in d])
233 return sum([eaw(c) in _wide and 2 or 1 for c in d])
234 return len(d)
234 return len(d)
235
235
236 def getcols(s, start, c):
236 def getcols(s, start, c):
237 '''Use colwidth to find a c-column substring of s starting at byte
237 '''Use colwidth to find a c-column substring of s starting at byte
238 index start'''
238 index start'''
239 for x in xrange(start + c, len(s)):
239 for x in xrange(start + c, len(s)):
240 t = s[start:x]
240 t = s[start:x]
241 if colwidth(t) == c:
241 if colwidth(t) == c:
242 return t
242 return t
243
243
244 def trim(s, width, ellipsis='', leftside=False):
244 def trim(s, width, ellipsis='', leftside=False):
245 """Trim string 's' to at most 'width' columns (including 'ellipsis').
245 """Trim string 's' to at most 'width' columns (including 'ellipsis').
246
246
247 If 'leftside' is True, left side of string 's' is trimmed.
247 If 'leftside' is True, left side of string 's' is trimmed.
248 'ellipsis' is always placed at trimmed side.
248 'ellipsis' is always placed at trimmed side.
249
249
250 >>> from .node import bin
250 >>> ellipsis = b'+++'
251 >>> ellipsis = b'+++'
251 >>> from . import encoding
252 >>> from . import encoding
252 >>> encoding.encoding = b'utf-8'
253 >>> encoding.encoding = b'utf-8'
253 >>> t = b'1234567890'
254 >>> t = b'1234567890'
254 >>> print trim(t, 12, ellipsis=ellipsis)
255 >>> print trim(t, 12, ellipsis=ellipsis)
255 1234567890
256 1234567890
256 >>> print trim(t, 10, ellipsis=ellipsis)
257 >>> print trim(t, 10, ellipsis=ellipsis)
257 1234567890
258 1234567890
258 >>> print trim(t, 8, ellipsis=ellipsis)
259 >>> print trim(t, 8, ellipsis=ellipsis)
259 12345+++
260 12345+++
260 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
261 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
261 +++67890
262 +++67890
262 >>> print trim(t, 8)
263 >>> print trim(t, 8)
263 12345678
264 12345678
264 >>> print trim(t, 8, leftside=True)
265 >>> print trim(t, 8, leftside=True)
265 34567890
266 34567890
266 >>> print trim(t, 3, ellipsis=ellipsis)
267 >>> print trim(t, 3, ellipsis=ellipsis)
267 +++
268 +++
268 >>> print trim(t, 1, ellipsis=ellipsis)
269 >>> print trim(t, 1, ellipsis=ellipsis)
269 +
270 +
270 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
271 >>> u = u'\u3042\u3044\u3046\u3048\u304a' # 2 x 5 = 10 columns
271 >>> t = u.encode(pycompat.sysstr(encoding.encoding))
272 >>> t = u.encode(pycompat.sysstr(encoding.encoding))
272 >>> print trim(t, 12, ellipsis=ellipsis)
273 >>> print trim(t, 12, ellipsis=ellipsis)
273 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
274 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
274 >>> print trim(t, 10, ellipsis=ellipsis)
275 >>> print trim(t, 10, ellipsis=ellipsis)
275 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
276 \xe3\x81\x82\xe3\x81\x84\xe3\x81\x86\xe3\x81\x88\xe3\x81\x8a
276 >>> print trim(t, 8, ellipsis=ellipsis)
277 >>> print trim(t, 8, ellipsis=ellipsis)
277 \xe3\x81\x82\xe3\x81\x84+++
278 \xe3\x81\x82\xe3\x81\x84+++
278 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
279 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
279 +++\xe3\x81\x88\xe3\x81\x8a
280 +++\xe3\x81\x88\xe3\x81\x8a
280 >>> print trim(t, 5)
281 >>> print trim(t, 5)
281 \xe3\x81\x82\xe3\x81\x84
282 \xe3\x81\x82\xe3\x81\x84
282 >>> print trim(t, 5, leftside=True)
283 >>> print trim(t, 5, leftside=True)
283 \xe3\x81\x88\xe3\x81\x8a
284 \xe3\x81\x88\xe3\x81\x8a
284 >>> print trim(t, 4, ellipsis=ellipsis)
285 >>> print trim(t, 4, ellipsis=ellipsis)
285 +++
286 +++
286 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
287 >>> print trim(t, 4, ellipsis=ellipsis, leftside=True)
287 +++
288 +++
288 >>> t = b'\x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa' # invalid byte sequence
289 >>> t = bin(b'112233445566778899aa') # invalid byte sequence
289 >>> print trim(t, 12, ellipsis=ellipsis)
290 >>> print trim(t, 12, ellipsis=ellipsis)
290 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
291 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
291 >>> print trim(t, 10, ellipsis=ellipsis)
292 >>> print trim(t, 10, ellipsis=ellipsis)
292 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
293 \x11\x22\x33\x44\x55\x66\x77\x88\x99\xaa
293 >>> print trim(t, 8, ellipsis=ellipsis)
294 >>> print trim(t, 8, ellipsis=ellipsis)
294 \x11\x22\x33\x44\x55+++
295 \x11\x22\x33\x44\x55+++
295 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
296 >>> print trim(t, 8, ellipsis=ellipsis, leftside=True)
296 +++\x66\x77\x88\x99\xaa
297 +++\x66\x77\x88\x99\xaa
297 >>> print trim(t, 8)
298 >>> print trim(t, 8)
298 \x11\x22\x33\x44\x55\x66\x77\x88
299 \x11\x22\x33\x44\x55\x66\x77\x88
299 >>> print trim(t, 8, leftside=True)
300 >>> print trim(t, 8, leftside=True)
300 \x33\x44\x55\x66\x77\x88\x99\xaa
301 \x33\x44\x55\x66\x77\x88\x99\xaa
301 >>> print trim(t, 3, ellipsis=ellipsis)
302 >>> print trim(t, 3, ellipsis=ellipsis)
302 +++
303 +++
303 >>> print trim(t, 1, ellipsis=ellipsis)
304 >>> print trim(t, 1, ellipsis=ellipsis)
304 +
305 +
305 """
306 """
306 try:
307 try:
307 u = s.decode(_sysstr(encoding))
308 u = s.decode(_sysstr(encoding))
308 except UnicodeDecodeError:
309 except UnicodeDecodeError:
309 if len(s) <= width: # trimming is not needed
310 if len(s) <= width: # trimming is not needed
310 return s
311 return s
311 width -= len(ellipsis)
312 width -= len(ellipsis)
312 if width <= 0: # no enough room even for ellipsis
313 if width <= 0: # no enough room even for ellipsis
313 return ellipsis[:width + len(ellipsis)]
314 return ellipsis[:width + len(ellipsis)]
314 if leftside:
315 if leftside:
315 return ellipsis + s[-width:]
316 return ellipsis + s[-width:]
316 return s[:width] + ellipsis
317 return s[:width] + ellipsis
317
318
318 if ucolwidth(u) <= width: # trimming is not needed
319 if ucolwidth(u) <= width: # trimming is not needed
319 return s
320 return s
320
321
321 width -= len(ellipsis)
322 width -= len(ellipsis)
322 if width <= 0: # no enough room even for ellipsis
323 if width <= 0: # no enough room even for ellipsis
323 return ellipsis[:width + len(ellipsis)]
324 return ellipsis[:width + len(ellipsis)]
324
325
325 if leftside:
326 if leftside:
326 uslice = lambda i: u[i:]
327 uslice = lambda i: u[i:]
327 concat = lambda s: ellipsis + s
328 concat = lambda s: ellipsis + s
328 else:
329 else:
329 uslice = lambda i: u[:-i]
330 uslice = lambda i: u[:-i]
330 concat = lambda s: s + ellipsis
331 concat = lambda s: s + ellipsis
331 for i in xrange(1, len(u)):
332 for i in xrange(1, len(u)):
332 usub = uslice(i)
333 usub = uslice(i)
333 if ucolwidth(usub) <= width:
334 if ucolwidth(usub) <= width:
334 return concat(usub.encode(_sysstr(encoding)))
335 return concat(usub.encode(_sysstr(encoding)))
335 return ellipsis # no enough room for multi-column characters
336 return ellipsis # no enough room for multi-column characters
336
337
337 def lower(s):
338 def lower(s):
338 "best-effort encoding-aware case-folding of local string s"
339 "best-effort encoding-aware case-folding of local string s"
339 try:
340 try:
340 return asciilower(s)
341 return asciilower(s)
341 except UnicodeDecodeError:
342 except UnicodeDecodeError:
342 pass
343 pass
343 try:
344 try:
344 if isinstance(s, localstr):
345 if isinstance(s, localstr):
345 u = s._utf8.decode("utf-8")
346 u = s._utf8.decode("utf-8")
346 else:
347 else:
347 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
348 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
348
349
349 lu = u.lower()
350 lu = u.lower()
350 if u == lu:
351 if u == lu:
351 return s # preserve localstring
352 return s # preserve localstring
352 return lu.encode(_sysstr(encoding))
353 return lu.encode(_sysstr(encoding))
353 except UnicodeError:
354 except UnicodeError:
354 return s.lower() # we don't know how to fold this except in ASCII
355 return s.lower() # we don't know how to fold this except in ASCII
355 except LookupError as k:
356 except LookupError as k:
356 raise error.Abort(k, hint="please check your locale settings")
357 raise error.Abort(k, hint="please check your locale settings")
357
358
358 def upper(s):
359 def upper(s):
359 "best-effort encoding-aware case-folding of local string s"
360 "best-effort encoding-aware case-folding of local string s"
360 try:
361 try:
361 return asciiupper(s)
362 return asciiupper(s)
362 except UnicodeDecodeError:
363 except UnicodeDecodeError:
363 return upperfallback(s)
364 return upperfallback(s)
364
365
365 def upperfallback(s):
366 def upperfallback(s):
366 try:
367 try:
367 if isinstance(s, localstr):
368 if isinstance(s, localstr):
368 u = s._utf8.decode("utf-8")
369 u = s._utf8.decode("utf-8")
369 else:
370 else:
370 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
371 u = s.decode(_sysstr(encoding), _sysstr(encodingmode))
371
372
372 uu = u.upper()
373 uu = u.upper()
373 if u == uu:
374 if u == uu:
374 return s # preserve localstring
375 return s # preserve localstring
375 return uu.encode(_sysstr(encoding))
376 return uu.encode(_sysstr(encoding))
376 except UnicodeError:
377 except UnicodeError:
377 return s.upper() # we don't know how to fold this except in ASCII
378 return s.upper() # we don't know how to fold this except in ASCII
378 except LookupError as k:
379 except LookupError as k:
379 raise error.Abort(k, hint="please check your locale settings")
380 raise error.Abort(k, hint="please check your locale settings")
380
381
381 class normcasespecs(object):
382 class normcasespecs(object):
382 '''what a platform's normcase does to ASCII strings
383 '''what a platform's normcase does to ASCII strings
383
384
384 This is specified per platform, and should be consistent with what normcase
385 This is specified per platform, and should be consistent with what normcase
385 on that platform actually does.
386 on that platform actually does.
386
387
387 lower: normcase lowercases ASCII strings
388 lower: normcase lowercases ASCII strings
388 upper: normcase uppercases ASCII strings
389 upper: normcase uppercases ASCII strings
389 other: the fallback function should always be called
390 other: the fallback function should always be called
390
391
391 This should be kept in sync with normcase_spec in util.h.'''
392 This should be kept in sync with normcase_spec in util.h.'''
392 lower = -1
393 lower = -1
393 upper = 1
394 upper = 1
394 other = 0
395 other = 0
395
396
396 def jsonescape(s, paranoid=False):
397 def jsonescape(s, paranoid=False):
397 '''returns a string suitable for JSON
398 '''returns a string suitable for JSON
398
399
399 JSON is problematic for us because it doesn't support non-Unicode
400 JSON is problematic for us because it doesn't support non-Unicode
400 bytes. To deal with this, we take the following approach:
401 bytes. To deal with this, we take the following approach:
401
402
402 - localstr objects are converted back to UTF-8
403 - localstr objects are converted back to UTF-8
403 - valid UTF-8/ASCII strings are passed as-is
404 - valid UTF-8/ASCII strings are passed as-is
404 - other strings are converted to UTF-8b surrogate encoding
405 - other strings are converted to UTF-8b surrogate encoding
405 - apply JSON-specified string escaping
406 - apply JSON-specified string escaping
406
407
407 (escapes are doubled in these tests)
408 (escapes are doubled in these tests)
408
409
409 >>> jsonescape(b'this is a test')
410 >>> jsonescape(b'this is a test')
410 'this is a test'
411 'this is a test'
411 >>> jsonescape(b'escape characters: \\0 \\x0b \\x7f')
412 >>> jsonescape(b'escape characters: \\0 \\x0b \\x7f')
412 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
413 'escape characters: \\\\u0000 \\\\u000b \\\\u007f'
413 >>> jsonescape(b'escape characters: \\b \\t \\n \\f \\r \\" \\\\')
414 >>> jsonescape(b'escape characters: \\b \\t \\n \\f \\r \\" \\\\')
414 'escape characters: \\\\b \\\\t \\\\n \\\\f \\\\r \\\\" \\\\\\\\'
415 'escape characters: \\\\b \\\\t \\\\n \\\\f \\\\r \\\\" \\\\\\\\'
415 >>> jsonescape(b'a weird byte: \\xdd')
416 >>> jsonescape(b'a weird byte: \\xdd')
416 'a weird byte: \\xed\\xb3\\x9d'
417 'a weird byte: \\xed\\xb3\\x9d'
417 >>> jsonescape(b'utf-8: caf\\xc3\\xa9')
418 >>> jsonescape(b'utf-8: caf\\xc3\\xa9')
418 'utf-8: caf\\xc3\\xa9'
419 'utf-8: caf\\xc3\\xa9'
419 >>> jsonescape(b'')
420 >>> jsonescape(b'')
420 ''
421 ''
421
422
422 If paranoid, non-ascii and common troublesome characters are also escaped.
423 If paranoid, non-ascii and common troublesome characters are also escaped.
423 This is suitable for web output.
424 This is suitable for web output.
424
425
425 >>> s = b'escape characters: \\0 \\x0b \\x7f'
426 >>> s = b'escape characters: \\0 \\x0b \\x7f'
426 >>> assert jsonescape(s) == jsonescape(s, paranoid=True)
427 >>> assert jsonescape(s) == jsonescape(s, paranoid=True)
427 >>> s = b'escape characters: \\b \\t \\n \\f \\r \\" \\\\'
428 >>> s = b'escape characters: \\b \\t \\n \\f \\r \\" \\\\'
428 >>> assert jsonescape(s) == jsonescape(s, paranoid=True)
429 >>> assert jsonescape(s) == jsonescape(s, paranoid=True)
429 >>> jsonescape(b'escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
430 >>> jsonescape(b'escape boundary: \\x7e \\x7f \\xc2\\x80', paranoid=True)
430 'escape boundary: ~ \\\\u007f \\\\u0080'
431 'escape boundary: ~ \\\\u007f \\\\u0080'
431 >>> jsonescape(b'a weird byte: \\xdd', paranoid=True)
432 >>> jsonescape(b'a weird byte: \\xdd', paranoid=True)
432 'a weird byte: \\\\udcdd'
433 'a weird byte: \\\\udcdd'
433 >>> jsonescape(b'utf-8: caf\\xc3\\xa9', paranoid=True)
434 >>> jsonescape(b'utf-8: caf\\xc3\\xa9', paranoid=True)
434 'utf-8: caf\\\\u00e9'
435 'utf-8: caf\\\\u00e9'
435 >>> jsonescape(b'non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
436 >>> jsonescape(b'non-BMP: \\xf0\\x9d\\x84\\x9e', paranoid=True)
436 'non-BMP: \\\\ud834\\\\udd1e'
437 'non-BMP: \\\\ud834\\\\udd1e'
437 >>> jsonescape(b'<foo@example.org>', paranoid=True)
438 >>> jsonescape(b'<foo@example.org>', paranoid=True)
438 '\\\\u003cfoo@example.org\\\\u003e'
439 '\\\\u003cfoo@example.org\\\\u003e'
439 '''
440 '''
440
441
441 u8chars = toutf8b(s)
442 u8chars = toutf8b(s)
442 try:
443 try:
443 return _jsonescapeu8fast(u8chars, paranoid)
444 return _jsonescapeu8fast(u8chars, paranoid)
444 except ValueError:
445 except ValueError:
445 pass
446 pass
446 return charencodepure.jsonescapeu8fallback(u8chars, paranoid)
447 return charencodepure.jsonescapeu8fallback(u8chars, paranoid)
447
448
448 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
449 _utf8len = [0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 3, 4]
449
450
450 def getutf8char(s, pos):
451 def getutf8char(s, pos):
451 '''get the next full utf-8 character in the given string, starting at pos
452 '''get the next full utf-8 character in the given string, starting at pos
452
453
453 Raises a UnicodeError if the given location does not start a valid
454 Raises a UnicodeError if the given location does not start a valid
454 utf-8 character.
455 utf-8 character.
455 '''
456 '''
456
457
457 # find how many bytes to attempt decoding from first nibble
458 # find how many bytes to attempt decoding from first nibble
458 l = _utf8len[ord(s[pos]) >> 4]
459 l = _utf8len[ord(s[pos]) >> 4]
459 if not l: # ascii
460 if not l: # ascii
460 return s[pos]
461 return s[pos]
461
462
462 c = s[pos:pos + l]
463 c = s[pos:pos + l]
463 # validate with attempted decode
464 # validate with attempted decode
464 c.decode("utf-8")
465 c.decode("utf-8")
465 return c
466 return c
466
467
467 def toutf8b(s):
468 def toutf8b(s):
468 '''convert a local, possibly-binary string into UTF-8b
469 '''convert a local, possibly-binary string into UTF-8b
469
470
470 This is intended as a generic method to preserve data when working
471 This is intended as a generic method to preserve data when working
471 with schemes like JSON and XML that have no provision for
472 with schemes like JSON and XML that have no provision for
472 arbitrary byte strings. As Mercurial often doesn't know
473 arbitrary byte strings. As Mercurial often doesn't know
473 what encoding data is in, we use so-called UTF-8b.
474 what encoding data is in, we use so-called UTF-8b.
474
475
475 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
476 If a string is already valid UTF-8 (or ASCII), it passes unmodified.
476 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
477 Otherwise, unsupported bytes are mapped to UTF-16 surrogate range,
477 uDC00-uDCFF.
478 uDC00-uDCFF.
478
479
479 Principles of operation:
480 Principles of operation:
480
481
481 - ASCII and UTF-8 data successfully round-trips and is understood
482 - ASCII and UTF-8 data successfully round-trips and is understood
482 by Unicode-oriented clients
483 by Unicode-oriented clients
483 - filenames and file contents in arbitrary other encodings can have
484 - filenames and file contents in arbitrary other encodings can have
484 be round-tripped or recovered by clueful clients
485 be round-tripped or recovered by clueful clients
485 - local strings that have a cached known UTF-8 encoding (aka
486 - local strings that have a cached known UTF-8 encoding (aka
486 localstr) get sent as UTF-8 so Unicode-oriented clients get the
487 localstr) get sent as UTF-8 so Unicode-oriented clients get the
487 Unicode data they want
488 Unicode data they want
488 - because we must preserve UTF-8 bytestring in places such as
489 - because we must preserve UTF-8 bytestring in places such as
489 filenames, metadata can't be roundtripped without help
490 filenames, metadata can't be roundtripped without help
490
491
491 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
492 (Note: "UTF-8b" often refers to decoding a mix of valid UTF-8 and
492 arbitrary bytes into an internal Unicode format that can be
493 arbitrary bytes into an internal Unicode format that can be
493 re-encoded back into the original. Here we are exposing the
494 re-encoded back into the original. Here we are exposing the
494 internal surrogate encoding as a UTF-8 string.)
495 internal surrogate encoding as a UTF-8 string.)
495 '''
496 '''
496
497
497 if not isinstance(s, localstr) and isasciistr(s):
498 if not isinstance(s, localstr) and isasciistr(s):
498 return s
499 return s
499 if "\xed" not in s:
500 if "\xed" not in s:
500 if isinstance(s, localstr):
501 if isinstance(s, localstr):
501 return s._utf8
502 return s._utf8
502 try:
503 try:
503 s.decode('utf-8')
504 s.decode('utf-8')
504 return s
505 return s
505 except UnicodeDecodeError:
506 except UnicodeDecodeError:
506 pass
507 pass
507
508
508 r = ""
509 r = ""
509 pos = 0
510 pos = 0
510 l = len(s)
511 l = len(s)
511 while pos < l:
512 while pos < l:
512 try:
513 try:
513 c = getutf8char(s, pos)
514 c = getutf8char(s, pos)
514 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
515 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
515 # have to re-escape existing U+DCxx characters
516 # have to re-escape existing U+DCxx characters
516 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
517 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
517 pos += 1
518 pos += 1
518 else:
519 else:
519 pos += len(c)
520 pos += len(c)
520 except UnicodeDecodeError:
521 except UnicodeDecodeError:
521 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
522 c = unichr(0xdc00 + ord(s[pos])).encode('utf-8')
522 pos += 1
523 pos += 1
523 r += c
524 r += c
524 return r
525 return r
525
526
526 def fromutf8b(s):
527 def fromutf8b(s):
527 '''Given a UTF-8b string, return a local, possibly-binary string.
528 '''Given a UTF-8b string, return a local, possibly-binary string.
528
529
529 return the original binary string. This
530 return the original binary string. This
530 is a round-trip process for strings like filenames, but metadata
531 is a round-trip process for strings like filenames, but metadata
531 that's was passed through tolocal will remain in UTF-8.
532 that's was passed through tolocal will remain in UTF-8.
532
533
533 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
534 >>> roundtrip = lambda x: fromutf8b(toutf8b(x)) == x
534 >>> m = b"\\xc3\\xa9\\x99abcd"
535 >>> m = b"\\xc3\\xa9\\x99abcd"
535 >>> toutf8b(m)
536 >>> toutf8b(m)
536 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
537 '\\xc3\\xa9\\xed\\xb2\\x99abcd'
537 >>> roundtrip(m)
538 >>> roundtrip(m)
538 True
539 True
539 >>> roundtrip(b"\\xc2\\xc2\\x80")
540 >>> roundtrip(b"\\xc2\\xc2\\x80")
540 True
541 True
541 >>> roundtrip(b"\\xef\\xbf\\xbd")
542 >>> roundtrip(b"\\xef\\xbf\\xbd")
542 True
543 True
543 >>> roundtrip(b"\\xef\\xef\\xbf\\xbd")
544 >>> roundtrip(b"\\xef\\xef\\xbf\\xbd")
544 True
545 True
545 >>> roundtrip(b"\\xf1\\x80\\x80\\x80\\x80")
546 >>> roundtrip(b"\\xf1\\x80\\x80\\x80\\x80")
546 True
547 True
547 '''
548 '''
548
549
549 if isasciistr(s):
550 if isasciistr(s):
550 return s
551 return s
551 # fast path - look for uDxxx prefixes in s
552 # fast path - look for uDxxx prefixes in s
552 if "\xed" not in s:
553 if "\xed" not in s:
553 return s
554 return s
554
555
555 # We could do this with the unicode type but some Python builds
556 # We could do this with the unicode type but some Python builds
556 # use UTF-16 internally (issue5031) which causes non-BMP code
557 # use UTF-16 internally (issue5031) which causes non-BMP code
557 # points to be escaped. Instead, we use our handy getutf8char
558 # points to be escaped. Instead, we use our handy getutf8char
558 # helper again to walk the string without "decoding" it.
559 # helper again to walk the string without "decoding" it.
559
560
560 r = ""
561 r = ""
561 pos = 0
562 pos = 0
562 l = len(s)
563 l = len(s)
563 while pos < l:
564 while pos < l:
564 c = getutf8char(s, pos)
565 c = getutf8char(s, pos)
565 pos += len(c)
566 pos += len(c)
566 # unescape U+DCxx characters
567 # unescape U+DCxx characters
567 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
568 if "\xed\xb0\x80" <= c <= "\xed\xb3\xbf":
568 c = chr(ord(c.decode("utf-8")) & 0xff)
569 c = chr(ord(c.decode("utf-8")) & 0xff)
569 r += c
570 r += c
570 return r
571 return r
571
572
572 if pycompat.ispy3:
573 if pycompat.ispy3:
573 class strio(io.TextIOWrapper):
574 class strio(io.TextIOWrapper):
574 """Wrapper around TextIOWrapper that respects hg's encoding assumptions.
575 """Wrapper around TextIOWrapper that respects hg's encoding assumptions.
575
576
576 Also works around Python closing streams.
577 Also works around Python closing streams.
577 """
578 """
578
579
579 def __init__(self, buffer):
580 def __init__(self, buffer):
580 super(strio, self).__init__(buffer, encoding=_sysstr(encoding))
581 super(strio, self).__init__(buffer, encoding=_sysstr(encoding))
581
582
582 def __del__(self):
583 def __del__(self):
583 """Override __del__ so it doesn't close the underlying stream."""
584 """Override __del__ so it doesn't close the underlying stream."""
584 else:
585 else:
585 strio = pycompat.identity
586 strio = pycompat.identity
@@ -1,575 +1,575
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import hashlib
11 import hashlib
12 import os
12 import os
13 import stat
13 import stat
14
14
15 from .i18n import _
15 from .i18n import _
16 from . import (
16 from . import (
17 error,
17 error,
18 policy,
18 policy,
19 pycompat,
19 pycompat,
20 util,
20 util,
21 vfs as vfsmod,
21 vfs as vfsmod,
22 )
22 )
23
23
24 parsers = policy.importmod(r'parsers')
24 parsers = policy.importmod(r'parsers')
25
25
26 # This avoids a collision between a file named foo and a dir named
26 # This avoids a collision between a file named foo and a dir named
27 # foo.i or foo.d
27 # foo.i or foo.d
28 def _encodedir(path):
28 def _encodedir(path):
29 '''
29 '''
30 >>> _encodedir(b'data/foo.i')
30 >>> _encodedir(b'data/foo.i')
31 'data/foo.i'
31 'data/foo.i'
32 >>> _encodedir(b'data/foo.i/bla.i')
32 >>> _encodedir(b'data/foo.i/bla.i')
33 'data/foo.i.hg/bla.i'
33 'data/foo.i.hg/bla.i'
34 >>> _encodedir(b'data/foo.i.hg/bla.i')
34 >>> _encodedir(b'data/foo.i.hg/bla.i')
35 'data/foo.i.hg.hg/bla.i'
35 'data/foo.i.hg.hg/bla.i'
36 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
36 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
37 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
37 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
38 '''
38 '''
39 return (path
39 return (path
40 .replace(".hg/", ".hg.hg/")
40 .replace(".hg/", ".hg.hg/")
41 .replace(".i/", ".i.hg/")
41 .replace(".i/", ".i.hg/")
42 .replace(".d/", ".d.hg/"))
42 .replace(".d/", ".d.hg/"))
43
43
44 encodedir = getattr(parsers, 'encodedir', _encodedir)
44 encodedir = getattr(parsers, 'encodedir', _encodedir)
45
45
46 def decodedir(path):
46 def decodedir(path):
47 '''
47 '''
48 >>> decodedir(b'data/foo.i')
48 >>> decodedir(b'data/foo.i')
49 'data/foo.i'
49 'data/foo.i'
50 >>> decodedir(b'data/foo.i.hg/bla.i')
50 >>> decodedir(b'data/foo.i.hg/bla.i')
51 'data/foo.i/bla.i'
51 'data/foo.i/bla.i'
52 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
52 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
53 'data/foo.i.hg/bla.i'
53 'data/foo.i.hg/bla.i'
54 '''
54 '''
55 if ".hg/" not in path:
55 if ".hg/" not in path:
56 return path
56 return path
57 return (path
57 return (path
58 .replace(".d.hg/", ".d/")
58 .replace(".d.hg/", ".d/")
59 .replace(".i.hg/", ".i/")
59 .replace(".i.hg/", ".i/")
60 .replace(".hg.hg/", ".hg/"))
60 .replace(".hg.hg/", ".hg/"))
61
61
62 def _reserved():
62 def _reserved():
63 ''' characters that are problematic for filesystems
63 ''' characters that are problematic for filesystems
64
64
65 * ascii escapes (0..31)
65 * ascii escapes (0..31)
66 * ascii hi (126..255)
66 * ascii hi (126..255)
67 * windows specials
67 * windows specials
68
68
69 these characters will be escaped by encodefunctions
69 these characters will be escaped by encodefunctions
70 '''
70 '''
71 winreserved = [ord(x) for x in u'\\:*?"<>|']
71 winreserved = [ord(x) for x in u'\\:*?"<>|']
72 for x in range(32):
72 for x in range(32):
73 yield x
73 yield x
74 for x in range(126, 256):
74 for x in range(126, 256):
75 yield x
75 yield x
76 for x in winreserved:
76 for x in winreserved:
77 yield x
77 yield x
78
78
79 def _buildencodefun():
79 def _buildencodefun():
80 '''
80 '''
81 >>> enc, dec = _buildencodefun()
81 >>> enc, dec = _buildencodefun()
82
82
83 >>> enc(b'nothing/special.txt')
83 >>> enc(b'nothing/special.txt')
84 'nothing/special.txt'
84 'nothing/special.txt'
85 >>> dec(b'nothing/special.txt')
85 >>> dec(b'nothing/special.txt')
86 'nothing/special.txt'
86 'nothing/special.txt'
87
87
88 >>> enc(b'HELLO')
88 >>> enc(b'HELLO')
89 '_h_e_l_l_o'
89 '_h_e_l_l_o'
90 >>> dec(b'_h_e_l_l_o')
90 >>> dec(b'_h_e_l_l_o')
91 'HELLO'
91 'HELLO'
92
92
93 >>> enc(b'hello:world?')
93 >>> enc(b'hello:world?')
94 'hello~3aworld~3f'
94 'hello~3aworld~3f'
95 >>> dec(b'hello~3aworld~3f')
95 >>> dec(b'hello~3aworld~3f')
96 'hello:world?'
96 'hello:world?'
97
97
98 >>> enc(b'the\x07quick\xADshot')
98 >>> enc(b'the\\x07quick\\xADshot')
99 'the~07quick~adshot'
99 'the~07quick~adshot'
100 >>> dec(b'the~07quick~adshot')
100 >>> dec(b'the~07quick~adshot')
101 'the\\x07quick\\xadshot'
101 'the\\x07quick\\xadshot'
102 '''
102 '''
103 e = '_'
103 e = '_'
104 xchr = pycompat.bytechr
104 xchr = pycompat.bytechr
105 asciistr = list(map(xchr, range(127)))
105 asciistr = list(map(xchr, range(127)))
106 capitals = list(range(ord("A"), ord("Z") + 1))
106 capitals = list(range(ord("A"), ord("Z") + 1))
107
107
108 cmap = dict((x, x) for x in asciistr)
108 cmap = dict((x, x) for x in asciistr)
109 for x in _reserved():
109 for x in _reserved():
110 cmap[xchr(x)] = "~%02x" % x
110 cmap[xchr(x)] = "~%02x" % x
111 for x in capitals + [ord(e)]:
111 for x in capitals + [ord(e)]:
112 cmap[xchr(x)] = e + xchr(x).lower()
112 cmap[xchr(x)] = e + xchr(x).lower()
113
113
114 dmap = {}
114 dmap = {}
115 for k, v in cmap.iteritems():
115 for k, v in cmap.iteritems():
116 dmap[v] = k
116 dmap[v] = k
117 def decode(s):
117 def decode(s):
118 i = 0
118 i = 0
119 while i < len(s):
119 while i < len(s):
120 for l in xrange(1, 4):
120 for l in xrange(1, 4):
121 try:
121 try:
122 yield dmap[s[i:i + l]]
122 yield dmap[s[i:i + l]]
123 i += l
123 i += l
124 break
124 break
125 except KeyError:
125 except KeyError:
126 pass
126 pass
127 else:
127 else:
128 raise KeyError
128 raise KeyError
129 return (lambda s: ''.join([cmap[s[c:c + 1]] for c in xrange(len(s))]),
129 return (lambda s: ''.join([cmap[s[c:c + 1]] for c in xrange(len(s))]),
130 lambda s: ''.join(list(decode(s))))
130 lambda s: ''.join(list(decode(s))))
131
131
132 _encodefname, _decodefname = _buildencodefun()
132 _encodefname, _decodefname = _buildencodefun()
133
133
134 def encodefilename(s):
134 def encodefilename(s):
135 '''
135 '''
136 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
136 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
137 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
137 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
138 '''
138 '''
139 return _encodefname(encodedir(s))
139 return _encodefname(encodedir(s))
140
140
141 def decodefilename(s):
141 def decodefilename(s):
142 '''
142 '''
143 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
143 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
144 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
144 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
145 '''
145 '''
146 return decodedir(_decodefname(s))
146 return decodedir(_decodefname(s))
147
147
148 def _buildlowerencodefun():
148 def _buildlowerencodefun():
149 '''
149 '''
150 >>> f = _buildlowerencodefun()
150 >>> f = _buildlowerencodefun()
151 >>> f(b'nothing/special.txt')
151 >>> f(b'nothing/special.txt')
152 'nothing/special.txt'
152 'nothing/special.txt'
153 >>> f(b'HELLO')
153 >>> f(b'HELLO')
154 'hello'
154 'hello'
155 >>> f(b'hello:world?')
155 >>> f(b'hello:world?')
156 'hello~3aworld~3f'
156 'hello~3aworld~3f'
157 >>> f(b'the\x07quick\xADshot')
157 >>> f(b'the\\x07quick\\xADshot')
158 'the~07quick~adshot'
158 'the~07quick~adshot'
159 '''
159 '''
160 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
160 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
161 for x in _reserved():
161 for x in _reserved():
162 cmap[chr(x)] = "~%02x" % x
162 cmap[chr(x)] = "~%02x" % x
163 for x in range(ord("A"), ord("Z") + 1):
163 for x in range(ord("A"), ord("Z") + 1):
164 cmap[chr(x)] = chr(x).lower()
164 cmap[chr(x)] = chr(x).lower()
165 return lambda s: "".join([cmap[c] for c in s])
165 return lambda s: "".join([cmap[c] for c in s])
166
166
167 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
167 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
168
168
169 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
169 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
170 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
170 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
171 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
171 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
172 def _auxencode(path, dotencode):
172 def _auxencode(path, dotencode):
173 '''
173 '''
174 Encodes filenames containing names reserved by Windows or which end in
174 Encodes filenames containing names reserved by Windows or which end in
175 period or space. Does not touch other single reserved characters c.
175 period or space. Does not touch other single reserved characters c.
176 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
176 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
177 Additionally encodes space or period at the beginning, if dotencode is
177 Additionally encodes space or period at the beginning, if dotencode is
178 True. Parameter path is assumed to be all lowercase.
178 True. Parameter path is assumed to be all lowercase.
179 A segment only needs encoding if a reserved name appears as a
179 A segment only needs encoding if a reserved name appears as a
180 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
180 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
181 doesn't need encoding.
181 doesn't need encoding.
182
182
183 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
183 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
184 >>> _auxencode(s.split(b'/'), True)
184 >>> _auxencode(s.split(b'/'), True)
185 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
185 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
186 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
186 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
187 >>> _auxencode(s.split(b'/'), False)
187 >>> _auxencode(s.split(b'/'), False)
188 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
188 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
189 >>> _auxencode([b'foo. '], True)
189 >>> _auxencode([b'foo. '], True)
190 ['foo.~20']
190 ['foo.~20']
191 >>> _auxencode([b' .foo'], True)
191 >>> _auxencode([b' .foo'], True)
192 ['~20.foo']
192 ['~20.foo']
193 '''
193 '''
194 for i, n in enumerate(path):
194 for i, n in enumerate(path):
195 if not n:
195 if not n:
196 continue
196 continue
197 if dotencode and n[0] in '. ':
197 if dotencode and n[0] in '. ':
198 n = "~%02x" % ord(n[0:1]) + n[1:]
198 n = "~%02x" % ord(n[0:1]) + n[1:]
199 path[i] = n
199 path[i] = n
200 else:
200 else:
201 l = n.find('.')
201 l = n.find('.')
202 if l == -1:
202 if l == -1:
203 l = len(n)
203 l = len(n)
204 if ((l == 3 and n[:3] in _winres3) or
204 if ((l == 3 and n[:3] in _winres3) or
205 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
205 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
206 and n[:3] in _winres4)):
206 and n[:3] in _winres4)):
207 # encode third letter ('aux' -> 'au~78')
207 # encode third letter ('aux' -> 'au~78')
208 ec = "~%02x" % ord(n[2:3])
208 ec = "~%02x" % ord(n[2:3])
209 n = n[0:2] + ec + n[3:]
209 n = n[0:2] + ec + n[3:]
210 path[i] = n
210 path[i] = n
211 if n[-1] in '. ':
211 if n[-1] in '. ':
212 # encode last period or space ('foo...' -> 'foo..~2e')
212 # encode last period or space ('foo...' -> 'foo..~2e')
213 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
213 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
214 return path
214 return path
215
215
216 _maxstorepathlen = 120
216 _maxstorepathlen = 120
217 _dirprefixlen = 8
217 _dirprefixlen = 8
218 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
218 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
219
219
220 def _hashencode(path, dotencode):
220 def _hashencode(path, dotencode):
221 digest = hashlib.sha1(path).hexdigest()
221 digest = hashlib.sha1(path).hexdigest()
222 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
222 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
223 parts = _auxencode(le, dotencode)
223 parts = _auxencode(le, dotencode)
224 basename = parts[-1]
224 basename = parts[-1]
225 _root, ext = os.path.splitext(basename)
225 _root, ext = os.path.splitext(basename)
226 sdirs = []
226 sdirs = []
227 sdirslen = 0
227 sdirslen = 0
228 for p in parts[:-1]:
228 for p in parts[:-1]:
229 d = p[:_dirprefixlen]
229 d = p[:_dirprefixlen]
230 if d[-1] in '. ':
230 if d[-1] in '. ':
231 # Windows can't access dirs ending in period or space
231 # Windows can't access dirs ending in period or space
232 d = d[:-1] + '_'
232 d = d[:-1] + '_'
233 if sdirslen == 0:
233 if sdirslen == 0:
234 t = len(d)
234 t = len(d)
235 else:
235 else:
236 t = sdirslen + 1 + len(d)
236 t = sdirslen + 1 + len(d)
237 if t > _maxshortdirslen:
237 if t > _maxshortdirslen:
238 break
238 break
239 sdirs.append(d)
239 sdirs.append(d)
240 sdirslen = t
240 sdirslen = t
241 dirs = '/'.join(sdirs)
241 dirs = '/'.join(sdirs)
242 if len(dirs) > 0:
242 if len(dirs) > 0:
243 dirs += '/'
243 dirs += '/'
244 res = 'dh/' + dirs + digest + ext
244 res = 'dh/' + dirs + digest + ext
245 spaceleft = _maxstorepathlen - len(res)
245 spaceleft = _maxstorepathlen - len(res)
246 if spaceleft > 0:
246 if spaceleft > 0:
247 filler = basename[:spaceleft]
247 filler = basename[:spaceleft]
248 res = 'dh/' + dirs + filler + digest + ext
248 res = 'dh/' + dirs + filler + digest + ext
249 return res
249 return res
250
250
251 def _hybridencode(path, dotencode):
251 def _hybridencode(path, dotencode):
252 '''encodes path with a length limit
252 '''encodes path with a length limit
253
253
254 Encodes all paths that begin with 'data/', according to the following.
254 Encodes all paths that begin with 'data/', according to the following.
255
255
256 Default encoding (reversible):
256 Default encoding (reversible):
257
257
258 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
258 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
259 characters are encoded as '~xx', where xx is the two digit hex code
259 characters are encoded as '~xx', where xx is the two digit hex code
260 of the character (see encodefilename).
260 of the character (see encodefilename).
261 Relevant path components consisting of Windows reserved filenames are
261 Relevant path components consisting of Windows reserved filenames are
262 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
262 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
263
263
264 Hashed encoding (not reversible):
264 Hashed encoding (not reversible):
265
265
266 If the default-encoded path is longer than _maxstorepathlen, a
266 If the default-encoded path is longer than _maxstorepathlen, a
267 non-reversible hybrid hashing of the path is done instead.
267 non-reversible hybrid hashing of the path is done instead.
268 This encoding uses up to _dirprefixlen characters of all directory
268 This encoding uses up to _dirprefixlen characters of all directory
269 levels of the lowerencoded path, but not more levels than can fit into
269 levels of the lowerencoded path, but not more levels than can fit into
270 _maxshortdirslen.
270 _maxshortdirslen.
271 Then follows the filler followed by the sha digest of the full path.
271 Then follows the filler followed by the sha digest of the full path.
272 The filler is the beginning of the basename of the lowerencoded path
272 The filler is the beginning of the basename of the lowerencoded path
273 (the basename is everything after the last path separator). The filler
273 (the basename is everything after the last path separator). The filler
274 is as long as possible, filling in characters from the basename until
274 is as long as possible, filling in characters from the basename until
275 the encoded path has _maxstorepathlen characters (or all chars of the
275 the encoded path has _maxstorepathlen characters (or all chars of the
276 basename have been taken).
276 basename have been taken).
277 The extension (e.g. '.i' or '.d') is preserved.
277 The extension (e.g. '.i' or '.d') is preserved.
278
278
279 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
279 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
280 encoding was used.
280 encoding was used.
281 '''
281 '''
282 path = encodedir(path)
282 path = encodedir(path)
283 ef = _encodefname(path).split('/')
283 ef = _encodefname(path).split('/')
284 res = '/'.join(_auxencode(ef, dotencode))
284 res = '/'.join(_auxencode(ef, dotencode))
285 if len(res) > _maxstorepathlen:
285 if len(res) > _maxstorepathlen:
286 res = _hashencode(path, dotencode)
286 res = _hashencode(path, dotencode)
287 return res
287 return res
288
288
289 def _pathencode(path):
289 def _pathencode(path):
290 de = encodedir(path)
290 de = encodedir(path)
291 if len(path) > _maxstorepathlen:
291 if len(path) > _maxstorepathlen:
292 return _hashencode(de, True)
292 return _hashencode(de, True)
293 ef = _encodefname(de).split('/')
293 ef = _encodefname(de).split('/')
294 res = '/'.join(_auxencode(ef, True))
294 res = '/'.join(_auxencode(ef, True))
295 if len(res) > _maxstorepathlen:
295 if len(res) > _maxstorepathlen:
296 return _hashencode(de, True)
296 return _hashencode(de, True)
297 return res
297 return res
298
298
299 _pathencode = getattr(parsers, 'pathencode', _pathencode)
299 _pathencode = getattr(parsers, 'pathencode', _pathencode)
300
300
301 def _plainhybridencode(f):
301 def _plainhybridencode(f):
302 return _hybridencode(f, False)
302 return _hybridencode(f, False)
303
303
304 def _calcmode(vfs):
304 def _calcmode(vfs):
305 try:
305 try:
306 # files in .hg/ will be created using this mode
306 # files in .hg/ will be created using this mode
307 mode = vfs.stat().st_mode
307 mode = vfs.stat().st_mode
308 # avoid some useless chmods
308 # avoid some useless chmods
309 if (0o777 & ~util.umask) == (0o777 & mode):
309 if (0o777 & ~util.umask) == (0o777 & mode):
310 mode = None
310 mode = None
311 except OSError:
311 except OSError:
312 mode = None
312 mode = None
313 return mode
313 return mode
314
314
315 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
315 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
316 ' phaseroots obsstore')
316 ' phaseroots obsstore')
317
317
318 class basicstore(object):
318 class basicstore(object):
319 '''base class for local repository stores'''
319 '''base class for local repository stores'''
320 def __init__(self, path, vfstype):
320 def __init__(self, path, vfstype):
321 vfs = vfstype(path)
321 vfs = vfstype(path)
322 self.path = vfs.base
322 self.path = vfs.base
323 self.createmode = _calcmode(vfs)
323 self.createmode = _calcmode(vfs)
324 vfs.createmode = self.createmode
324 vfs.createmode = self.createmode
325 self.rawvfs = vfs
325 self.rawvfs = vfs
326 self.vfs = vfsmod.filtervfs(vfs, encodedir)
326 self.vfs = vfsmod.filtervfs(vfs, encodedir)
327 self.opener = self.vfs
327 self.opener = self.vfs
328
328
329 def join(self, f):
329 def join(self, f):
330 return self.path + '/' + encodedir(f)
330 return self.path + '/' + encodedir(f)
331
331
332 def _walk(self, relpath, recurse):
332 def _walk(self, relpath, recurse):
333 '''yields (unencoded, encoded, size)'''
333 '''yields (unencoded, encoded, size)'''
334 path = self.path
334 path = self.path
335 if relpath:
335 if relpath:
336 path += '/' + relpath
336 path += '/' + relpath
337 striplen = len(self.path) + 1
337 striplen = len(self.path) + 1
338 l = []
338 l = []
339 if self.rawvfs.isdir(path):
339 if self.rawvfs.isdir(path):
340 visit = [path]
340 visit = [path]
341 readdir = self.rawvfs.readdir
341 readdir = self.rawvfs.readdir
342 while visit:
342 while visit:
343 p = visit.pop()
343 p = visit.pop()
344 for f, kind, st in readdir(p, stat=True):
344 for f, kind, st in readdir(p, stat=True):
345 fp = p + '/' + f
345 fp = p + '/' + f
346 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
346 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
347 n = util.pconvert(fp[striplen:])
347 n = util.pconvert(fp[striplen:])
348 l.append((decodedir(n), n, st.st_size))
348 l.append((decodedir(n), n, st.st_size))
349 elif kind == stat.S_IFDIR and recurse:
349 elif kind == stat.S_IFDIR and recurse:
350 visit.append(fp)
350 visit.append(fp)
351 l.sort()
351 l.sort()
352 return l
352 return l
353
353
354 def datafiles(self):
354 def datafiles(self):
355 return self._walk('data', True) + self._walk('meta', True)
355 return self._walk('data', True) + self._walk('meta', True)
356
356
357 def topfiles(self):
357 def topfiles(self):
358 # yield manifest before changelog
358 # yield manifest before changelog
359 return reversed(self._walk('', False))
359 return reversed(self._walk('', False))
360
360
361 def walk(self):
361 def walk(self):
362 '''yields (unencoded, encoded, size)'''
362 '''yields (unencoded, encoded, size)'''
363 # yield data files first
363 # yield data files first
364 for x in self.datafiles():
364 for x in self.datafiles():
365 yield x
365 yield x
366 for x in self.topfiles():
366 for x in self.topfiles():
367 yield x
367 yield x
368
368
369 def copylist(self):
369 def copylist(self):
370 return ['requires'] + _data.split()
370 return ['requires'] + _data.split()
371
371
372 def write(self, tr):
372 def write(self, tr):
373 pass
373 pass
374
374
375 def invalidatecaches(self):
375 def invalidatecaches(self):
376 pass
376 pass
377
377
378 def markremoved(self, fn):
378 def markremoved(self, fn):
379 pass
379 pass
380
380
381 def __contains__(self, path):
381 def __contains__(self, path):
382 '''Checks if the store contains path'''
382 '''Checks if the store contains path'''
383 path = "/".join(("data", path))
383 path = "/".join(("data", path))
384 # file?
384 # file?
385 if self.vfs.exists(path + ".i"):
385 if self.vfs.exists(path + ".i"):
386 return True
386 return True
387 # dir?
387 # dir?
388 if not path.endswith("/"):
388 if not path.endswith("/"):
389 path = path + "/"
389 path = path + "/"
390 return self.vfs.exists(path)
390 return self.vfs.exists(path)
391
391
392 class encodedstore(basicstore):
392 class encodedstore(basicstore):
393 def __init__(self, path, vfstype):
393 def __init__(self, path, vfstype):
394 vfs = vfstype(path + '/store')
394 vfs = vfstype(path + '/store')
395 self.path = vfs.base
395 self.path = vfs.base
396 self.createmode = _calcmode(vfs)
396 self.createmode = _calcmode(vfs)
397 vfs.createmode = self.createmode
397 vfs.createmode = self.createmode
398 self.rawvfs = vfs
398 self.rawvfs = vfs
399 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
399 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
400 self.opener = self.vfs
400 self.opener = self.vfs
401
401
402 def datafiles(self):
402 def datafiles(self):
403 for a, b, size in super(encodedstore, self).datafiles():
403 for a, b, size in super(encodedstore, self).datafiles():
404 try:
404 try:
405 a = decodefilename(a)
405 a = decodefilename(a)
406 except KeyError:
406 except KeyError:
407 a = None
407 a = None
408 yield a, b, size
408 yield a, b, size
409
409
410 def join(self, f):
410 def join(self, f):
411 return self.path + '/' + encodefilename(f)
411 return self.path + '/' + encodefilename(f)
412
412
413 def copylist(self):
413 def copylist(self):
414 return (['requires', '00changelog.i'] +
414 return (['requires', '00changelog.i'] +
415 ['store/' + f for f in _data.split()])
415 ['store/' + f for f in _data.split()])
416
416
417 class fncache(object):
417 class fncache(object):
418 # the filename used to be partially encoded
418 # the filename used to be partially encoded
419 # hence the encodedir/decodedir dance
419 # hence the encodedir/decodedir dance
420 def __init__(self, vfs):
420 def __init__(self, vfs):
421 self.vfs = vfs
421 self.vfs = vfs
422 self.entries = None
422 self.entries = None
423 self._dirty = False
423 self._dirty = False
424
424
425 def _load(self):
425 def _load(self):
426 '''fill the entries from the fncache file'''
426 '''fill the entries from the fncache file'''
427 self._dirty = False
427 self._dirty = False
428 try:
428 try:
429 fp = self.vfs('fncache', mode='rb')
429 fp = self.vfs('fncache', mode='rb')
430 except IOError:
430 except IOError:
431 # skip nonexistent file
431 # skip nonexistent file
432 self.entries = set()
432 self.entries = set()
433 return
433 return
434 self.entries = set(decodedir(fp.read()).splitlines())
434 self.entries = set(decodedir(fp.read()).splitlines())
435 if '' in self.entries:
435 if '' in self.entries:
436 fp.seek(0)
436 fp.seek(0)
437 for n, line in enumerate(util.iterfile(fp)):
437 for n, line in enumerate(util.iterfile(fp)):
438 if not line.rstrip('\n'):
438 if not line.rstrip('\n'):
439 t = _('invalid entry in fncache, line %d') % (n + 1)
439 t = _('invalid entry in fncache, line %d') % (n + 1)
440 raise error.Abort(t)
440 raise error.Abort(t)
441 fp.close()
441 fp.close()
442
442
443 def write(self, tr):
443 def write(self, tr):
444 if self._dirty:
444 if self._dirty:
445 tr.addbackup('fncache')
445 tr.addbackup('fncache')
446 fp = self.vfs('fncache', mode='wb', atomictemp=True)
446 fp = self.vfs('fncache', mode='wb', atomictemp=True)
447 if self.entries:
447 if self.entries:
448 fp.write(encodedir('\n'.join(self.entries) + '\n'))
448 fp.write(encodedir('\n'.join(self.entries) + '\n'))
449 fp.close()
449 fp.close()
450 self._dirty = False
450 self._dirty = False
451
451
452 def add(self, fn):
452 def add(self, fn):
453 if self.entries is None:
453 if self.entries is None:
454 self._load()
454 self._load()
455 if fn not in self.entries:
455 if fn not in self.entries:
456 self._dirty = True
456 self._dirty = True
457 self.entries.add(fn)
457 self.entries.add(fn)
458
458
459 def remove(self, fn):
459 def remove(self, fn):
460 if self.entries is None:
460 if self.entries is None:
461 self._load()
461 self._load()
462 try:
462 try:
463 self.entries.remove(fn)
463 self.entries.remove(fn)
464 self._dirty = True
464 self._dirty = True
465 except KeyError:
465 except KeyError:
466 pass
466 pass
467
467
468 def __contains__(self, fn):
468 def __contains__(self, fn):
469 if self.entries is None:
469 if self.entries is None:
470 self._load()
470 self._load()
471 return fn in self.entries
471 return fn in self.entries
472
472
473 def __iter__(self):
473 def __iter__(self):
474 if self.entries is None:
474 if self.entries is None:
475 self._load()
475 self._load()
476 return iter(self.entries)
476 return iter(self.entries)
477
477
478 class _fncachevfs(vfsmod.abstractvfs, vfsmod.proxyvfs):
478 class _fncachevfs(vfsmod.abstractvfs, vfsmod.proxyvfs):
479 def __init__(self, vfs, fnc, encode):
479 def __init__(self, vfs, fnc, encode):
480 vfsmod.proxyvfs.__init__(self, vfs)
480 vfsmod.proxyvfs.__init__(self, vfs)
481 self.fncache = fnc
481 self.fncache = fnc
482 self.encode = encode
482 self.encode = encode
483
483
484 def __call__(self, path, mode='r', *args, **kw):
484 def __call__(self, path, mode='r', *args, **kw):
485 if mode not in ('r', 'rb') and (path.startswith('data/') or
485 if mode not in ('r', 'rb') and (path.startswith('data/') or
486 path.startswith('meta/')):
486 path.startswith('meta/')):
487 self.fncache.add(path)
487 self.fncache.add(path)
488 return self.vfs(self.encode(path), mode, *args, **kw)
488 return self.vfs(self.encode(path), mode, *args, **kw)
489
489
490 def join(self, path):
490 def join(self, path):
491 if path:
491 if path:
492 return self.vfs.join(self.encode(path))
492 return self.vfs.join(self.encode(path))
493 else:
493 else:
494 return self.vfs.join(path)
494 return self.vfs.join(path)
495
495
496 class fncachestore(basicstore):
496 class fncachestore(basicstore):
497 def __init__(self, path, vfstype, dotencode):
497 def __init__(self, path, vfstype, dotencode):
498 if dotencode:
498 if dotencode:
499 encode = _pathencode
499 encode = _pathencode
500 else:
500 else:
501 encode = _plainhybridencode
501 encode = _plainhybridencode
502 self.encode = encode
502 self.encode = encode
503 vfs = vfstype(path + '/store')
503 vfs = vfstype(path + '/store')
504 self.path = vfs.base
504 self.path = vfs.base
505 self.pathsep = self.path + '/'
505 self.pathsep = self.path + '/'
506 self.createmode = _calcmode(vfs)
506 self.createmode = _calcmode(vfs)
507 vfs.createmode = self.createmode
507 vfs.createmode = self.createmode
508 self.rawvfs = vfs
508 self.rawvfs = vfs
509 fnc = fncache(vfs)
509 fnc = fncache(vfs)
510 self.fncache = fnc
510 self.fncache = fnc
511 self.vfs = _fncachevfs(vfs, fnc, encode)
511 self.vfs = _fncachevfs(vfs, fnc, encode)
512 self.opener = self.vfs
512 self.opener = self.vfs
513
513
514 def join(self, f):
514 def join(self, f):
515 return self.pathsep + self.encode(f)
515 return self.pathsep + self.encode(f)
516
516
517 def getsize(self, path):
517 def getsize(self, path):
518 return self.rawvfs.stat(path).st_size
518 return self.rawvfs.stat(path).st_size
519
519
520 def datafiles(self):
520 def datafiles(self):
521 for f in sorted(self.fncache):
521 for f in sorted(self.fncache):
522 ef = self.encode(f)
522 ef = self.encode(f)
523 try:
523 try:
524 yield f, ef, self.getsize(ef)
524 yield f, ef, self.getsize(ef)
525 except OSError as err:
525 except OSError as err:
526 if err.errno != errno.ENOENT:
526 if err.errno != errno.ENOENT:
527 raise
527 raise
528
528
529 def copylist(self):
529 def copylist(self):
530 d = ('data meta dh fncache phaseroots obsstore'
530 d = ('data meta dh fncache phaseroots obsstore'
531 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
531 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
532 return (['requires', '00changelog.i'] +
532 return (['requires', '00changelog.i'] +
533 ['store/' + f for f in d.split()])
533 ['store/' + f for f in d.split()])
534
534
535 def write(self, tr):
535 def write(self, tr):
536 self.fncache.write(tr)
536 self.fncache.write(tr)
537
537
538 def invalidatecaches(self):
538 def invalidatecaches(self):
539 self.fncache.entries = None
539 self.fncache.entries = None
540
540
541 def markremoved(self, fn):
541 def markremoved(self, fn):
542 self.fncache.remove(fn)
542 self.fncache.remove(fn)
543
543
544 def _exists(self, f):
544 def _exists(self, f):
545 ef = self.encode(f)
545 ef = self.encode(f)
546 try:
546 try:
547 self.getsize(ef)
547 self.getsize(ef)
548 return True
548 return True
549 except OSError as err:
549 except OSError as err:
550 if err.errno != errno.ENOENT:
550 if err.errno != errno.ENOENT:
551 raise
551 raise
552 # nonexistent entry
552 # nonexistent entry
553 return False
553 return False
554
554
555 def __contains__(self, path):
555 def __contains__(self, path):
556 '''Checks if the store contains path'''
556 '''Checks if the store contains path'''
557 path = "/".join(("data", path))
557 path = "/".join(("data", path))
558 # check for files (exact match)
558 # check for files (exact match)
559 e = path + '.i'
559 e = path + '.i'
560 if e in self.fncache and self._exists(e):
560 if e in self.fncache and self._exists(e):
561 return True
561 return True
562 # now check for directories (prefix match)
562 # now check for directories (prefix match)
563 if not path.endswith('/'):
563 if not path.endswith('/'):
564 path += '/'
564 path += '/'
565 for e in self.fncache:
565 for e in self.fncache:
566 if e.startswith(path) and self._exists(e):
566 if e.startswith(path) and self._exists(e):
567 return True
567 return True
568 return False
568 return False
569
569
570 def store(requirements, path, vfstype):
570 def store(requirements, path, vfstype):
571 if 'store' in requirements:
571 if 'store' in requirements:
572 if 'fncache' in requirements:
572 if 'fncache' in requirements:
573 return fncachestore(path, vfstype, 'dotencode' in requirements)
573 return fncachestore(path, vfstype, 'dotencode' in requirements)
574 return encodedstore(path, vfstype)
574 return encodedstore(path, vfstype)
575 return basicstore(path, vfstype)
575 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now