##// END OF EJS Templates
store: document the `walk` method...
marmoute -
r47612:eed3e2b7 default
parent child Browse files
Show More
@@ -1,754 +1,756
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import functools
11 import functools
12 import os
12 import os
13 import stat
13 import stat
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .node import hex
17 from .node import hex
18 from . import (
18 from . import (
19 changelog,
19 changelog,
20 error,
20 error,
21 manifest,
21 manifest,
22 policy,
22 policy,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 vfs as vfsmod,
25 vfs as vfsmod,
26 )
26 )
27 from .utils import hashutil
27 from .utils import hashutil
28
28
29 parsers = policy.importmod('parsers')
29 parsers = policy.importmod('parsers')
30 # how much bytes should be read from fncache in one read
30 # how much bytes should be read from fncache in one read
31 # It is done to prevent loading large fncache files into memory
31 # It is done to prevent loading large fncache files into memory
32 fncache_chunksize = 10 ** 6
32 fncache_chunksize = 10 ** 6
33
33
34
34
35 def _matchtrackedpath(path, matcher):
35 def _matchtrackedpath(path, matcher):
36 """parses a fncache entry and returns whether the entry is tracking a path
36 """parses a fncache entry and returns whether the entry is tracking a path
37 matched by matcher or not.
37 matched by matcher or not.
38
38
39 If matcher is None, returns True"""
39 If matcher is None, returns True"""
40
40
41 if matcher is None:
41 if matcher is None:
42 return True
42 return True
43 path = decodedir(path)
43 path = decodedir(path)
44 if path.startswith(b'data/'):
44 if path.startswith(b'data/'):
45 return matcher(path[len(b'data/') : -len(b'.i')])
45 return matcher(path[len(b'data/') : -len(b'.i')])
46 elif path.startswith(b'meta/'):
46 elif path.startswith(b'meta/'):
47 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
47 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48
48
49 raise error.ProgrammingError(b"cannot decode path %s" % path)
49 raise error.ProgrammingError(b"cannot decode path %s" % path)
50
50
51
51
52 # This avoids a collision between a file named foo and a dir named
52 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
53 # foo.i or foo.d
54 def _encodedir(path):
54 def _encodedir(path):
55 """
55 """
56 >>> _encodedir(b'data/foo.i')
56 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
57 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
58 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
59 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
61 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 """
64 """
65 return (
65 return (
66 path.replace(b".hg/", b".hg.hg/")
66 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
67 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
68 .replace(b".d/", b".d.hg/")
69 )
69 )
70
70
71
71
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
73
74
74
75 def decodedir(path):
75 def decodedir(path):
76 """
76 """
77 >>> decodedir(b'data/foo.i')
77 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
78 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
80 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
82 'data/foo.i.hg/bla.i'
83 """
83 """
84 if b".hg/" not in path:
84 if b".hg/" not in path:
85 return path
85 return path
86 return (
86 return (
87 path.replace(b".d.hg/", b".d/")
87 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
88 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
89 .replace(b".hg.hg/", b".hg/")
90 )
90 )
91
91
92
92
93 def _reserved():
93 def _reserved():
94 """characters that are problematic for filesystems
94 """characters that are problematic for filesystems
95
95
96 * ascii escapes (0..31)
96 * ascii escapes (0..31)
97 * ascii hi (126..255)
97 * ascii hi (126..255)
98 * windows specials
98 * windows specials
99
99
100 these characters will be escaped by encodefunctions
100 these characters will be escaped by encodefunctions
101 """
101 """
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
103 for x in range(32):
104 yield x
104 yield x
105 for x in range(126, 256):
105 for x in range(126, 256):
106 yield x
106 yield x
107 for x in winreserved:
107 for x in winreserved:
108 yield x
108 yield x
109
109
110
110
111 def _buildencodefun():
111 def _buildencodefun():
112 """
112 """
113 >>> enc, dec = _buildencodefun()
113 >>> enc, dec = _buildencodefun()
114
114
115 >>> enc(b'nothing/special.txt')
115 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
116 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
117 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
118 'nothing/special.txt'
119
119
120 >>> enc(b'HELLO')
120 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
121 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
122 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
123 'HELLO'
124
124
125 >>> enc(b'hello:world?')
125 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
126 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
127 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
128 'hello:world?'
129
129
130 >>> enc(b'the\\x07quick\\xADshot')
130 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
131 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
132 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
133 'the\\x07quick\\xadshot'
134 """
134 """
135 e = b'_'
135 e = b'_'
136 xchr = pycompat.bytechr
136 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
137 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
139
140 cmap = {x: x for x in asciistr}
140 cmap = {x: x for x in asciistr}
141 for x in _reserved():
141 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
142 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
143 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
144 cmap[xchr(x)] = e + xchr(x).lower()
145
145
146 dmap = {}
146 dmap = {}
147 for k, v in pycompat.iteritems(cmap):
147 for k, v in pycompat.iteritems(cmap):
148 dmap[v] = k
148 dmap[v] = k
149
149
150 def decode(s):
150 def decode(s):
151 i = 0
151 i = 0
152 while i < len(s):
152 while i < len(s):
153 for l in pycompat.xrange(1, 4):
153 for l in pycompat.xrange(1, 4):
154 try:
154 try:
155 yield dmap[s[i : i + l]]
155 yield dmap[s[i : i + l]]
156 i += l
156 i += l
157 break
157 break
158 except KeyError:
158 except KeyError:
159 pass
159 pass
160 else:
160 else:
161 raise KeyError
161 raise KeyError
162
162
163 return (
163 return (
164 lambda s: b''.join(
164 lambda s: b''.join(
165 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
165 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
166 ),
166 ),
167 lambda s: b''.join(list(decode(s))),
167 lambda s: b''.join(list(decode(s))),
168 )
168 )
169
169
170
170
171 _encodefname, _decodefname = _buildencodefun()
171 _encodefname, _decodefname = _buildencodefun()
172
172
173
173
174 def encodefilename(s):
174 def encodefilename(s):
175 """
175 """
176 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
177 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
178 """
178 """
179 return _encodefname(encodedir(s))
179 return _encodefname(encodedir(s))
180
180
181
181
182 def decodefilename(s):
182 def decodefilename(s):
183 """
183 """
184 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
185 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
186 """
186 """
187 return decodedir(_decodefname(s))
187 return decodedir(_decodefname(s))
188
188
189
189
190 def _buildlowerencodefun():
190 def _buildlowerencodefun():
191 """
191 """
192 >>> f = _buildlowerencodefun()
192 >>> f = _buildlowerencodefun()
193 >>> f(b'nothing/special.txt')
193 >>> f(b'nothing/special.txt')
194 'nothing/special.txt'
194 'nothing/special.txt'
195 >>> f(b'HELLO')
195 >>> f(b'HELLO')
196 'hello'
196 'hello'
197 >>> f(b'hello:world?')
197 >>> f(b'hello:world?')
198 'hello~3aworld~3f'
198 'hello~3aworld~3f'
199 >>> f(b'the\\x07quick\\xADshot')
199 >>> f(b'the\\x07quick\\xADshot')
200 'the~07quick~adshot'
200 'the~07quick~adshot'
201 """
201 """
202 xchr = pycompat.bytechr
202 xchr = pycompat.bytechr
203 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
203 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
204 for x in _reserved():
204 for x in _reserved():
205 cmap[xchr(x)] = b"~%02x" % x
205 cmap[xchr(x)] = b"~%02x" % x
206 for x in range(ord(b"A"), ord(b"Z") + 1):
206 for x in range(ord(b"A"), ord(b"Z") + 1):
207 cmap[xchr(x)] = xchr(x).lower()
207 cmap[xchr(x)] = xchr(x).lower()
208
208
209 def lowerencode(s):
209 def lowerencode(s):
210 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
211
211
212 return lowerencode
212 return lowerencode
213
213
214
214
215 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
216
216
217 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
218 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
219 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
220
220
221
221
222 def _auxencode(path, dotencode):
222 def _auxencode(path, dotencode):
223 """
223 """
224 Encodes filenames containing names reserved by Windows or which end in
224 Encodes filenames containing names reserved by Windows or which end in
225 period or space. Does not touch other single reserved characters c.
225 period or space. Does not touch other single reserved characters c.
226 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
227 Additionally encodes space or period at the beginning, if dotencode is
227 Additionally encodes space or period at the beginning, if dotencode is
228 True. Parameter path is assumed to be all lowercase.
228 True. Parameter path is assumed to be all lowercase.
229 A segment only needs encoding if a reserved name appears as a
229 A segment only needs encoding if a reserved name appears as a
230 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
231 doesn't need encoding.
231 doesn't need encoding.
232
232
233 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
234 >>> _auxencode(s.split(b'/'), True)
234 >>> _auxencode(s.split(b'/'), True)
235 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
236 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
237 >>> _auxencode(s.split(b'/'), False)
237 >>> _auxencode(s.split(b'/'), False)
238 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
239 >>> _auxencode([b'foo. '], True)
239 >>> _auxencode([b'foo. '], True)
240 ['foo.~20']
240 ['foo.~20']
241 >>> _auxencode([b' .foo'], True)
241 >>> _auxencode([b' .foo'], True)
242 ['~20.foo']
242 ['~20.foo']
243 """
243 """
244 for i, n in enumerate(path):
244 for i, n in enumerate(path):
245 if not n:
245 if not n:
246 continue
246 continue
247 if dotencode and n[0] in b'. ':
247 if dotencode and n[0] in b'. ':
248 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 n = b"~%02x" % ord(n[0:1]) + n[1:]
249 path[i] = n
249 path[i] = n
250 else:
250 else:
251 l = n.find(b'.')
251 l = n.find(b'.')
252 if l == -1:
252 if l == -1:
253 l = len(n)
253 l = len(n)
254 if (l == 3 and n[:3] in _winres3) or (
254 if (l == 3 and n[:3] in _winres3) or (
255 l == 4
255 l == 4
256 and n[3:4] <= b'9'
256 and n[3:4] <= b'9'
257 and n[3:4] >= b'1'
257 and n[3:4] >= b'1'
258 and n[:3] in _winres4
258 and n[:3] in _winres4
259 ):
259 ):
260 # encode third letter ('aux' -> 'au~78')
260 # encode third letter ('aux' -> 'au~78')
261 ec = b"~%02x" % ord(n[2:3])
261 ec = b"~%02x" % ord(n[2:3])
262 n = n[0:2] + ec + n[3:]
262 n = n[0:2] + ec + n[3:]
263 path[i] = n
263 path[i] = n
264 if n[-1] in b'. ':
264 if n[-1] in b'. ':
265 # encode last period or space ('foo...' -> 'foo..~2e')
265 # encode last period or space ('foo...' -> 'foo..~2e')
266 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
267 return path
267 return path
268
268
269
269
270 _maxstorepathlen = 120
270 _maxstorepathlen = 120
271 _dirprefixlen = 8
271 _dirprefixlen = 8
272 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
273
273
274
274
275 def _hashencode(path, dotencode):
275 def _hashencode(path, dotencode):
276 digest = hex(hashutil.sha1(path).digest())
276 digest = hex(hashutil.sha1(path).digest())
277 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
278 parts = _auxencode(le, dotencode)
278 parts = _auxencode(le, dotencode)
279 basename = parts[-1]
279 basename = parts[-1]
280 _root, ext = os.path.splitext(basename)
280 _root, ext = os.path.splitext(basename)
281 sdirs = []
281 sdirs = []
282 sdirslen = 0
282 sdirslen = 0
283 for p in parts[:-1]:
283 for p in parts[:-1]:
284 d = p[:_dirprefixlen]
284 d = p[:_dirprefixlen]
285 if d[-1] in b'. ':
285 if d[-1] in b'. ':
286 # Windows can't access dirs ending in period or space
286 # Windows can't access dirs ending in period or space
287 d = d[:-1] + b'_'
287 d = d[:-1] + b'_'
288 if sdirslen == 0:
288 if sdirslen == 0:
289 t = len(d)
289 t = len(d)
290 else:
290 else:
291 t = sdirslen + 1 + len(d)
291 t = sdirslen + 1 + len(d)
292 if t > _maxshortdirslen:
292 if t > _maxshortdirslen:
293 break
293 break
294 sdirs.append(d)
294 sdirs.append(d)
295 sdirslen = t
295 sdirslen = t
296 dirs = b'/'.join(sdirs)
296 dirs = b'/'.join(sdirs)
297 if len(dirs) > 0:
297 if len(dirs) > 0:
298 dirs += b'/'
298 dirs += b'/'
299 res = b'dh/' + dirs + digest + ext
299 res = b'dh/' + dirs + digest + ext
300 spaceleft = _maxstorepathlen - len(res)
300 spaceleft = _maxstorepathlen - len(res)
301 if spaceleft > 0:
301 if spaceleft > 0:
302 filler = basename[:spaceleft]
302 filler = basename[:spaceleft]
303 res = b'dh/' + dirs + filler + digest + ext
303 res = b'dh/' + dirs + filler + digest + ext
304 return res
304 return res
305
305
306
306
307 def _hybridencode(path, dotencode):
307 def _hybridencode(path, dotencode):
308 """encodes path with a length limit
308 """encodes path with a length limit
309
309
310 Encodes all paths that begin with 'data/', according to the following.
310 Encodes all paths that begin with 'data/', according to the following.
311
311
312 Default encoding (reversible):
312 Default encoding (reversible):
313
313
314 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
315 characters are encoded as '~xx', where xx is the two digit hex code
315 characters are encoded as '~xx', where xx is the two digit hex code
316 of the character (see encodefilename).
316 of the character (see encodefilename).
317 Relevant path components consisting of Windows reserved filenames are
317 Relevant path components consisting of Windows reserved filenames are
318 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
319
319
320 Hashed encoding (not reversible):
320 Hashed encoding (not reversible):
321
321
322 If the default-encoded path is longer than _maxstorepathlen, a
322 If the default-encoded path is longer than _maxstorepathlen, a
323 non-reversible hybrid hashing of the path is done instead.
323 non-reversible hybrid hashing of the path is done instead.
324 This encoding uses up to _dirprefixlen characters of all directory
324 This encoding uses up to _dirprefixlen characters of all directory
325 levels of the lowerencoded path, but not more levels than can fit into
325 levels of the lowerencoded path, but not more levels than can fit into
326 _maxshortdirslen.
326 _maxshortdirslen.
327 Then follows the filler followed by the sha digest of the full path.
327 Then follows the filler followed by the sha digest of the full path.
328 The filler is the beginning of the basename of the lowerencoded path
328 The filler is the beginning of the basename of the lowerencoded path
329 (the basename is everything after the last path separator). The filler
329 (the basename is everything after the last path separator). The filler
330 is as long as possible, filling in characters from the basename until
330 is as long as possible, filling in characters from the basename until
331 the encoded path has _maxstorepathlen characters (or all chars of the
331 the encoded path has _maxstorepathlen characters (or all chars of the
332 basename have been taken).
332 basename have been taken).
333 The extension (e.g. '.i' or '.d') is preserved.
333 The extension (e.g. '.i' or '.d') is preserved.
334
334
335 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
336 encoding was used.
336 encoding was used.
337 """
337 """
338 path = encodedir(path)
338 path = encodedir(path)
339 ef = _encodefname(path).split(b'/')
339 ef = _encodefname(path).split(b'/')
340 res = b'/'.join(_auxencode(ef, dotencode))
340 res = b'/'.join(_auxencode(ef, dotencode))
341 if len(res) > _maxstorepathlen:
341 if len(res) > _maxstorepathlen:
342 res = _hashencode(path, dotencode)
342 res = _hashencode(path, dotencode)
343 return res
343 return res
344
344
345
345
346 def _pathencode(path):
346 def _pathencode(path):
347 de = encodedir(path)
347 de = encodedir(path)
348 if len(path) > _maxstorepathlen:
348 if len(path) > _maxstorepathlen:
349 return _hashencode(de, True)
349 return _hashencode(de, True)
350 ef = _encodefname(de).split(b'/')
350 ef = _encodefname(de).split(b'/')
351 res = b'/'.join(_auxencode(ef, True))
351 res = b'/'.join(_auxencode(ef, True))
352 if len(res) > _maxstorepathlen:
352 if len(res) > _maxstorepathlen:
353 return _hashencode(de, True)
353 return _hashencode(de, True)
354 return res
354 return res
355
355
356
356
357 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 _pathencode = getattr(parsers, 'pathencode', _pathencode)
358
358
359
359
360 def _plainhybridencode(f):
360 def _plainhybridencode(f):
361 return _hybridencode(f, False)
361 return _hybridencode(f, False)
362
362
363
363
364 def _calcmode(vfs):
364 def _calcmode(vfs):
365 try:
365 try:
366 # files in .hg/ will be created using this mode
366 # files in .hg/ will be created using this mode
367 mode = vfs.stat().st_mode
367 mode = vfs.stat().st_mode
368 # avoid some useless chmods
368 # avoid some useless chmods
369 if (0o777 & ~util.umask) == (0o777 & mode):
369 if (0o777 & ~util.umask) == (0o777 & mode):
370 mode = None
370 mode = None
371 except OSError:
371 except OSError:
372 mode = None
372 mode = None
373 return mode
373 return mode
374
374
375
375
376 _data = [
376 _data = [
377 b'bookmarks',
377 b'bookmarks',
378 b'narrowspec',
378 b'narrowspec',
379 b'data',
379 b'data',
380 b'meta',
380 b'meta',
381 b'00manifest.d',
381 b'00manifest.d',
382 b'00manifest.i',
382 b'00manifest.i',
383 b'00changelog.d',
383 b'00changelog.d',
384 b'00changelog.i',
384 b'00changelog.i',
385 b'phaseroots',
385 b'phaseroots',
386 b'obsstore',
386 b'obsstore',
387 b'requires',
387 b'requires',
388 ]
388 ]
389
389
390 REVLOG_FILES_EXT = (b'.i', b'.d', b'.n', b'.nd')
390 REVLOG_FILES_EXT = (b'.i', b'.d', b'.n', b'.nd')
391
391
392
392
393 def isrevlog(f, kind, st):
393 def isrevlog(f, kind, st):
394 if kind != stat.S_IFREG:
394 if kind != stat.S_IFREG:
395 return False
395 return False
396 return f.endswith(REVLOG_FILES_EXT)
396 return f.endswith(REVLOG_FILES_EXT)
397
397
398
398
399 class basicstore(object):
399 class basicstore(object):
400 '''base class for local repository stores'''
400 '''base class for local repository stores'''
401
401
402 def __init__(self, path, vfstype):
402 def __init__(self, path, vfstype):
403 vfs = vfstype(path)
403 vfs = vfstype(path)
404 self.path = vfs.base
404 self.path = vfs.base
405 self.createmode = _calcmode(vfs)
405 self.createmode = _calcmode(vfs)
406 vfs.createmode = self.createmode
406 vfs.createmode = self.createmode
407 self.rawvfs = vfs
407 self.rawvfs = vfs
408 self.vfs = vfsmod.filtervfs(vfs, encodedir)
408 self.vfs = vfsmod.filtervfs(vfs, encodedir)
409 self.opener = self.vfs
409 self.opener = self.vfs
410
410
411 def join(self, f):
411 def join(self, f):
412 return self.path + b'/' + encodedir(f)
412 return self.path + b'/' + encodedir(f)
413
413
414 def _walk(self, relpath, recurse, filefilter=isrevlog):
414 def _walk(self, relpath, recurse, filefilter=isrevlog):
415 '''yields (unencoded, encoded, size)'''
415 '''yields (unencoded, encoded, size)'''
416 path = self.path
416 path = self.path
417 if relpath:
417 if relpath:
418 path += b'/' + relpath
418 path += b'/' + relpath
419 striplen = len(self.path) + 1
419 striplen = len(self.path) + 1
420 l = []
420 l = []
421 if self.rawvfs.isdir(path):
421 if self.rawvfs.isdir(path):
422 visit = [path]
422 visit = [path]
423 readdir = self.rawvfs.readdir
423 readdir = self.rawvfs.readdir
424 while visit:
424 while visit:
425 p = visit.pop()
425 p = visit.pop()
426 for f, kind, st in readdir(p, stat=True):
426 for f, kind, st in readdir(p, stat=True):
427 fp = p + b'/' + f
427 fp = p + b'/' + f
428 if filefilter(f, kind, st):
428 if filefilter(f, kind, st):
429 n = util.pconvert(fp[striplen:])
429 n = util.pconvert(fp[striplen:])
430 l.append((decodedir(n), n, st.st_size))
430 l.append((decodedir(n), n, st.st_size))
431 elif kind == stat.S_IFDIR and recurse:
431 elif kind == stat.S_IFDIR and recurse:
432 visit.append(fp)
432 visit.append(fp)
433 l.sort()
433 l.sort()
434 return l
434 return l
435
435
436 def changelog(self, trypending, concurrencychecker=None):
436 def changelog(self, trypending, concurrencychecker=None):
437 return changelog.changelog(
437 return changelog.changelog(
438 self.vfs,
438 self.vfs,
439 trypending=trypending,
439 trypending=trypending,
440 concurrencychecker=concurrencychecker,
440 concurrencychecker=concurrencychecker,
441 )
441 )
442
442
443 def manifestlog(self, repo, storenarrowmatch):
443 def manifestlog(self, repo, storenarrowmatch):
444 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
444 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
445 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
445 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
446
446
447 def datafiles(self, matcher=None):
447 def datafiles(self, matcher=None):
448 return self._walk(b'data', True) + self._walk(b'meta', True)
448 return self._walk(b'data', True) + self._walk(b'meta', True)
449
449
450 def topfiles(self):
450 def topfiles(self):
451 # yield manifest before changelog
451 # yield manifest before changelog
452 return reversed(self._walk(b'', False))
452 return reversed(self._walk(b'', False))
453
453
454 def walk(self, matcher=None):
454 def walk(self, matcher=None):
455 """yields (unencoded, encoded, size)
455 """return file related to data storage (ie: revlogs)
456
457 yields (unencoded, encoded, size)
456
458
457 if a matcher is passed, storage files of only those tracked paths
459 if a matcher is passed, storage files of only those tracked paths
458 are passed with matches the matcher
460 are passed with matches the matcher
459 """
461 """
460 # yield data files first
462 # yield data files first
461 for x in self.datafiles(matcher):
463 for x in self.datafiles(matcher):
462 yield x
464 yield x
463 for x in self.topfiles():
465 for x in self.topfiles():
464 yield x
466 yield x
465
467
466 def copylist(self):
468 def copylist(self):
467 return _data
469 return _data
468
470
469 def write(self, tr):
471 def write(self, tr):
470 pass
472 pass
471
473
472 def invalidatecaches(self):
474 def invalidatecaches(self):
473 pass
475 pass
474
476
475 def markremoved(self, fn):
477 def markremoved(self, fn):
476 pass
478 pass
477
479
478 def __contains__(self, path):
480 def __contains__(self, path):
479 '''Checks if the store contains path'''
481 '''Checks if the store contains path'''
480 path = b"/".join((b"data", path))
482 path = b"/".join((b"data", path))
481 # file?
483 # file?
482 if self.vfs.exists(path + b".i"):
484 if self.vfs.exists(path + b".i"):
483 return True
485 return True
484 # dir?
486 # dir?
485 if not path.endswith(b"/"):
487 if not path.endswith(b"/"):
486 path = path + b"/"
488 path = path + b"/"
487 return self.vfs.exists(path)
489 return self.vfs.exists(path)
488
490
489
491
490 class encodedstore(basicstore):
492 class encodedstore(basicstore):
491 def __init__(self, path, vfstype):
493 def __init__(self, path, vfstype):
492 vfs = vfstype(path + b'/store')
494 vfs = vfstype(path + b'/store')
493 self.path = vfs.base
495 self.path = vfs.base
494 self.createmode = _calcmode(vfs)
496 self.createmode = _calcmode(vfs)
495 vfs.createmode = self.createmode
497 vfs.createmode = self.createmode
496 self.rawvfs = vfs
498 self.rawvfs = vfs
497 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
499 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
498 self.opener = self.vfs
500 self.opener = self.vfs
499
501
500 def datafiles(self, matcher=None):
502 def datafiles(self, matcher=None):
501 for a, b, size in super(encodedstore, self).datafiles():
503 for a, b, size in super(encodedstore, self).datafiles():
502 try:
504 try:
503 a = decodefilename(a)
505 a = decodefilename(a)
504 except KeyError:
506 except KeyError:
505 a = None
507 a = None
506 if a is not None and not _matchtrackedpath(a, matcher):
508 if a is not None and not _matchtrackedpath(a, matcher):
507 continue
509 continue
508 yield a, b, size
510 yield a, b, size
509
511
510 def join(self, f):
512 def join(self, f):
511 return self.path + b'/' + encodefilename(f)
513 return self.path + b'/' + encodefilename(f)
512
514
513 def copylist(self):
515 def copylist(self):
514 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
516 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
515
517
516
518
517 class fncache(object):
519 class fncache(object):
518 # the filename used to be partially encoded
520 # the filename used to be partially encoded
519 # hence the encodedir/decodedir dance
521 # hence the encodedir/decodedir dance
520 def __init__(self, vfs):
522 def __init__(self, vfs):
521 self.vfs = vfs
523 self.vfs = vfs
522 self.entries = None
524 self.entries = None
523 self._dirty = False
525 self._dirty = False
524 # set of new additions to fncache
526 # set of new additions to fncache
525 self.addls = set()
527 self.addls = set()
526
528
527 def ensureloaded(self, warn=None):
529 def ensureloaded(self, warn=None):
528 """read the fncache file if not already read.
530 """read the fncache file if not already read.
529
531
530 If the file on disk is corrupted, raise. If warn is provided,
532 If the file on disk is corrupted, raise. If warn is provided,
531 warn and keep going instead."""
533 warn and keep going instead."""
532 if self.entries is None:
534 if self.entries is None:
533 self._load(warn)
535 self._load(warn)
534
536
535 def _load(self, warn=None):
537 def _load(self, warn=None):
536 '''fill the entries from the fncache file'''
538 '''fill the entries from the fncache file'''
537 self._dirty = False
539 self._dirty = False
538 try:
540 try:
539 fp = self.vfs(b'fncache', mode=b'rb')
541 fp = self.vfs(b'fncache', mode=b'rb')
540 except IOError:
542 except IOError:
541 # skip nonexistent file
543 # skip nonexistent file
542 self.entries = set()
544 self.entries = set()
543 return
545 return
544
546
545 self.entries = set()
547 self.entries = set()
546 chunk = b''
548 chunk = b''
547 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
549 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
548 chunk += c
550 chunk += c
549 try:
551 try:
550 p = chunk.rindex(b'\n')
552 p = chunk.rindex(b'\n')
551 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
553 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
552 chunk = chunk[p + 1 :]
554 chunk = chunk[p + 1 :]
553 except ValueError:
555 except ValueError:
554 # substring '\n' not found, maybe the entry is bigger than the
556 # substring '\n' not found, maybe the entry is bigger than the
555 # chunksize, so let's keep iterating
557 # chunksize, so let's keep iterating
556 pass
558 pass
557
559
558 if chunk:
560 if chunk:
559 msg = _(b"fncache does not ends with a newline")
561 msg = _(b"fncache does not ends with a newline")
560 if warn:
562 if warn:
561 warn(msg + b'\n')
563 warn(msg + b'\n')
562 else:
564 else:
563 raise error.Abort(
565 raise error.Abort(
564 msg,
566 msg,
565 hint=_(
567 hint=_(
566 b"use 'hg debugrebuildfncache' to "
568 b"use 'hg debugrebuildfncache' to "
567 b"rebuild the fncache"
569 b"rebuild the fncache"
568 ),
570 ),
569 )
571 )
570 self._checkentries(fp, warn)
572 self._checkentries(fp, warn)
571 fp.close()
573 fp.close()
572
574
573 def _checkentries(self, fp, warn):
575 def _checkentries(self, fp, warn):
574 """ make sure there is no empty string in entries """
576 """ make sure there is no empty string in entries """
575 if b'' in self.entries:
577 if b'' in self.entries:
576 fp.seek(0)
578 fp.seek(0)
577 for n, line in enumerate(util.iterfile(fp)):
579 for n, line in enumerate(util.iterfile(fp)):
578 if not line.rstrip(b'\n'):
580 if not line.rstrip(b'\n'):
579 t = _(b'invalid entry in fncache, line %d') % (n + 1)
581 t = _(b'invalid entry in fncache, line %d') % (n + 1)
580 if warn:
582 if warn:
581 warn(t + b'\n')
583 warn(t + b'\n')
582 else:
584 else:
583 raise error.Abort(t)
585 raise error.Abort(t)
584
586
585 def write(self, tr):
587 def write(self, tr):
586 if self._dirty:
588 if self._dirty:
587 assert self.entries is not None
589 assert self.entries is not None
588 self.entries = self.entries | self.addls
590 self.entries = self.entries | self.addls
589 self.addls = set()
591 self.addls = set()
590 tr.addbackup(b'fncache')
592 tr.addbackup(b'fncache')
591 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
593 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
592 if self.entries:
594 if self.entries:
593 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
595 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
594 fp.close()
596 fp.close()
595 self._dirty = False
597 self._dirty = False
596 if self.addls:
598 if self.addls:
597 # if we have just new entries, let's append them to the fncache
599 # if we have just new entries, let's append them to the fncache
598 tr.addbackup(b'fncache')
600 tr.addbackup(b'fncache')
599 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
601 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
600 if self.addls:
602 if self.addls:
601 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
603 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
602 fp.close()
604 fp.close()
603 self.entries = None
605 self.entries = None
604 self.addls = set()
606 self.addls = set()
605
607
606 def add(self, fn):
608 def add(self, fn):
607 if self.entries is None:
609 if self.entries is None:
608 self._load()
610 self._load()
609 if fn not in self.entries:
611 if fn not in self.entries:
610 self.addls.add(fn)
612 self.addls.add(fn)
611
613
612 def remove(self, fn):
614 def remove(self, fn):
613 if self.entries is None:
615 if self.entries is None:
614 self._load()
616 self._load()
615 if fn in self.addls:
617 if fn in self.addls:
616 self.addls.remove(fn)
618 self.addls.remove(fn)
617 return
619 return
618 try:
620 try:
619 self.entries.remove(fn)
621 self.entries.remove(fn)
620 self._dirty = True
622 self._dirty = True
621 except KeyError:
623 except KeyError:
622 pass
624 pass
623
625
624 def __contains__(self, fn):
626 def __contains__(self, fn):
625 if fn in self.addls:
627 if fn in self.addls:
626 return True
628 return True
627 if self.entries is None:
629 if self.entries is None:
628 self._load()
630 self._load()
629 return fn in self.entries
631 return fn in self.entries
630
632
631 def __iter__(self):
633 def __iter__(self):
632 if self.entries is None:
634 if self.entries is None:
633 self._load()
635 self._load()
634 return iter(self.entries | self.addls)
636 return iter(self.entries | self.addls)
635
637
636
638
637 class _fncachevfs(vfsmod.proxyvfs):
639 class _fncachevfs(vfsmod.proxyvfs):
638 def __init__(self, vfs, fnc, encode):
640 def __init__(self, vfs, fnc, encode):
639 vfsmod.proxyvfs.__init__(self, vfs)
641 vfsmod.proxyvfs.__init__(self, vfs)
640 self.fncache = fnc
642 self.fncache = fnc
641 self.encode = encode
643 self.encode = encode
642
644
643 def __call__(self, path, mode=b'r', *args, **kw):
645 def __call__(self, path, mode=b'r', *args, **kw):
644 encoded = self.encode(path)
646 encoded = self.encode(path)
645 if mode not in (b'r', b'rb') and (
647 if mode not in (b'r', b'rb') and (
646 path.startswith(b'data/') or path.startswith(b'meta/')
648 path.startswith(b'data/') or path.startswith(b'meta/')
647 ):
649 ):
648 # do not trigger a fncache load when adding a file that already is
650 # do not trigger a fncache load when adding a file that already is
649 # known to exist.
651 # known to exist.
650 notload = self.fncache.entries is None and self.vfs.exists(encoded)
652 notload = self.fncache.entries is None and self.vfs.exists(encoded)
651 if notload and b'a' in mode and not self.vfs.stat(encoded).st_size:
653 if notload and b'a' in mode and not self.vfs.stat(encoded).st_size:
652 # when appending to an existing file, if the file has size zero,
654 # when appending to an existing file, if the file has size zero,
653 # it should be considered as missing. Such zero-size files are
655 # it should be considered as missing. Such zero-size files are
654 # the result of truncation when a transaction is aborted.
656 # the result of truncation when a transaction is aborted.
655 notload = False
657 notload = False
656 if not notload:
658 if not notload:
657 self.fncache.add(path)
659 self.fncache.add(path)
658 return self.vfs(encoded, mode, *args, **kw)
660 return self.vfs(encoded, mode, *args, **kw)
659
661
660 def join(self, path):
662 def join(self, path):
661 if path:
663 if path:
662 return self.vfs.join(self.encode(path))
664 return self.vfs.join(self.encode(path))
663 else:
665 else:
664 return self.vfs.join(path)
666 return self.vfs.join(path)
665
667
666
668
667 class fncachestore(basicstore):
669 class fncachestore(basicstore):
668 def __init__(self, path, vfstype, dotencode):
670 def __init__(self, path, vfstype, dotencode):
669 if dotencode:
671 if dotencode:
670 encode = _pathencode
672 encode = _pathencode
671 else:
673 else:
672 encode = _plainhybridencode
674 encode = _plainhybridencode
673 self.encode = encode
675 self.encode = encode
674 vfs = vfstype(path + b'/store')
676 vfs = vfstype(path + b'/store')
675 self.path = vfs.base
677 self.path = vfs.base
676 self.pathsep = self.path + b'/'
678 self.pathsep = self.path + b'/'
677 self.createmode = _calcmode(vfs)
679 self.createmode = _calcmode(vfs)
678 vfs.createmode = self.createmode
680 vfs.createmode = self.createmode
679 self.rawvfs = vfs
681 self.rawvfs = vfs
680 fnc = fncache(vfs)
682 fnc = fncache(vfs)
681 self.fncache = fnc
683 self.fncache = fnc
682 self.vfs = _fncachevfs(vfs, fnc, encode)
684 self.vfs = _fncachevfs(vfs, fnc, encode)
683 self.opener = self.vfs
685 self.opener = self.vfs
684
686
685 def join(self, f):
687 def join(self, f):
686 return self.pathsep + self.encode(f)
688 return self.pathsep + self.encode(f)
687
689
688 def getsize(self, path):
690 def getsize(self, path):
689 return self.rawvfs.stat(path).st_size
691 return self.rawvfs.stat(path).st_size
690
692
691 def datafiles(self, matcher=None):
693 def datafiles(self, matcher=None):
692 for f in sorted(self.fncache):
694 for f in sorted(self.fncache):
693 if not _matchtrackedpath(f, matcher):
695 if not _matchtrackedpath(f, matcher):
694 continue
696 continue
695 ef = self.encode(f)
697 ef = self.encode(f)
696 try:
698 try:
697 yield f, ef, self.getsize(ef)
699 yield f, ef, self.getsize(ef)
698 except OSError as err:
700 except OSError as err:
699 if err.errno != errno.ENOENT:
701 if err.errno != errno.ENOENT:
700 raise
702 raise
701
703
702 def copylist(self):
704 def copylist(self):
703 d = (
705 d = (
704 b'bookmarks',
706 b'bookmarks',
705 b'narrowspec',
707 b'narrowspec',
706 b'data',
708 b'data',
707 b'meta',
709 b'meta',
708 b'dh',
710 b'dh',
709 b'fncache',
711 b'fncache',
710 b'phaseroots',
712 b'phaseroots',
711 b'obsstore',
713 b'obsstore',
712 b'00manifest.d',
714 b'00manifest.d',
713 b'00manifest.i',
715 b'00manifest.i',
714 b'00changelog.d',
716 b'00changelog.d',
715 b'00changelog.i',
717 b'00changelog.i',
716 b'requires',
718 b'requires',
717 )
719 )
718 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
720 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
719
721
720 def write(self, tr):
722 def write(self, tr):
721 self.fncache.write(tr)
723 self.fncache.write(tr)
722
724
723 def invalidatecaches(self):
725 def invalidatecaches(self):
724 self.fncache.entries = None
726 self.fncache.entries = None
725 self.fncache.addls = set()
727 self.fncache.addls = set()
726
728
727 def markremoved(self, fn):
729 def markremoved(self, fn):
728 self.fncache.remove(fn)
730 self.fncache.remove(fn)
729
731
730 def _exists(self, f):
732 def _exists(self, f):
731 ef = self.encode(f)
733 ef = self.encode(f)
732 try:
734 try:
733 self.getsize(ef)
735 self.getsize(ef)
734 return True
736 return True
735 except OSError as err:
737 except OSError as err:
736 if err.errno != errno.ENOENT:
738 if err.errno != errno.ENOENT:
737 raise
739 raise
738 # nonexistent entry
740 # nonexistent entry
739 return False
741 return False
740
742
741 def __contains__(self, path):
743 def __contains__(self, path):
742 '''Checks if the store contains path'''
744 '''Checks if the store contains path'''
743 path = b"/".join((b"data", path))
745 path = b"/".join((b"data", path))
744 # check for files (exact match)
746 # check for files (exact match)
745 e = path + b'.i'
747 e = path + b'.i'
746 if e in self.fncache and self._exists(e):
748 if e in self.fncache and self._exists(e):
747 return True
749 return True
748 # now check for directories (prefix match)
750 # now check for directories (prefix match)
749 if not path.endswith(b'/'):
751 if not path.endswith(b'/'):
750 path += b'/'
752 path += b'/'
751 for e in self.fncache:
753 for e in self.fncache:
752 if e.startswith(path) and self._exists(e):
754 if e.startswith(path) and self._exists(e):
753 return True
755 return True
754 return False
756 return False
General Comments 0
You need to be logged in to leave comments. Login now