##// END OF EJS Templates
store: change `_walk` return to `(filename, (type, size))`...
marmoute -
r51371:1c0244a8 default
parent child Browse files
Show More
@@ -1,967 +1,971 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # files that are "volatile" and might change between listing and streaming
398 # files that are "volatile" and might change between listing and streaming
399 #
399 #
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 # deleted.
401 # deleted.
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403
403
404 # some exception to the above matching
404 # some exception to the above matching
405 #
405 #
406 # XXX This is currently not in use because of issue6542
406 # XXX This is currently not in use because of issue6542
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408
408
409
409
410 def is_revlog(f, kind, st):
410 def is_revlog(f, kind, st):
411 if kind != stat.S_IFREG:
411 if kind != stat.S_IFREG:
412 return None
412 return None
413 return revlog_type(f)
413 return revlog_type(f)
414
414
415
415
416 def revlog_type(f):
416 def revlog_type(f):
417 # XXX we need to filter `undo.` created by the transaction here, however
417 # XXX we need to filter `undo.` created by the transaction here, however
418 # being naive about it also filter revlog for `undo.*` files, leading to
418 # being naive about it also filter revlog for `undo.*` files, leading to
419 # issue6542. So we no longer use EXCLUDED.
419 # issue6542. So we no longer use EXCLUDED.
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 return FILEFLAGS_REVLOG_MAIN
421 return FILEFLAGS_REVLOG_MAIN
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 t = FILETYPE_FILELOG_OTHER
423 t = FILETYPE_FILELOG_OTHER
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 t |= FILEFLAGS_VOLATILE
425 t |= FILEFLAGS_VOLATILE
426 return t
426 return t
427 return None
427 return None
428
428
429
429
430 # the file is part of changelog data
430 # the file is part of changelog data
431 FILEFLAGS_CHANGELOG = 1 << 13
431 FILEFLAGS_CHANGELOG = 1 << 13
432 # the file is part of manifest data
432 # the file is part of manifest data
433 FILEFLAGS_MANIFESTLOG = 1 << 12
433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 # the file is part of filelog data
434 # the file is part of filelog data
435 FILEFLAGS_FILELOG = 1 << 11
435 FILEFLAGS_FILELOG = 1 << 11
436 # file that are not directly part of a revlog
436 # file that are not directly part of a revlog
437 FILEFLAGS_OTHER = 1 << 10
437 FILEFLAGS_OTHER = 1 << 10
438
438
439 # the main entry point for a revlog
439 # the main entry point for a revlog
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 # a secondary file for a revlog
441 # a secondary file for a revlog
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443
443
444 # files that are "volatile" and might change between listing and streaming
444 # files that are "volatile" and might change between listing and streaming
445 FILEFLAGS_VOLATILE = 1 << 20
445 FILEFLAGS_VOLATILE = 1 << 20
446
446
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
454
454
455
455
456 @attr.s(slots=True, init=False)
456 @attr.s(slots=True, init=False)
457 class BaseStoreEntry:
457 class BaseStoreEntry:
458 """An entry in the store
458 """An entry in the store
459
459
460 This is returned by `store.walk` and represent some data in the store."""
460 This is returned by `store.walk` and represent some data in the store."""
461
461
462 unencoded_path = attr.ib()
462 unencoded_path = attr.ib()
463 _is_volatile = attr.ib(default=False)
463 _is_volatile = attr.ib(default=False)
464 _file_size = attr.ib(default=None)
464 _file_size = attr.ib(default=None)
465
465
466 def __init__(
466 def __init__(
467 self,
467 self,
468 unencoded_path,
468 unencoded_path,
469 is_volatile=False,
469 is_volatile=False,
470 file_size=None,
470 file_size=None,
471 ):
471 ):
472 self.unencoded_path = unencoded_path
472 self.unencoded_path = unencoded_path
473 self._is_volatile = is_volatile
473 self._is_volatile = is_volatile
474 self._file_size = file_size
474 self._file_size = file_size
475
475
476 def files(self):
476 def files(self):
477 return [
477 return [
478 StoreFile(
478 StoreFile(
479 unencoded_path=self.unencoded_path,
479 unencoded_path=self.unencoded_path,
480 file_size=self._file_size,
480 file_size=self._file_size,
481 is_volatile=self._is_volatile,
481 is_volatile=self._is_volatile,
482 )
482 )
483 ]
483 ]
484
484
485
485
486 @attr.s(slots=True, init=False)
486 @attr.s(slots=True, init=False)
487 class SimpleStoreEntry(BaseStoreEntry):
487 class SimpleStoreEntry(BaseStoreEntry):
488 """A generic entry in the store"""
488 """A generic entry in the store"""
489
489
490 is_revlog = False
490 is_revlog = False
491
491
492
492
493 @attr.s(slots=True, init=False)
493 @attr.s(slots=True, init=False)
494 class RevlogStoreEntry(BaseStoreEntry):
494 class RevlogStoreEntry(BaseStoreEntry):
495 """A revlog entry in the store"""
495 """A revlog entry in the store"""
496
496
497 is_revlog = True
497 is_revlog = True
498 revlog_type = attr.ib(default=None)
498 revlog_type = attr.ib(default=None)
499 is_revlog_main = attr.ib(default=None)
499 is_revlog_main = attr.ib(default=None)
500
500
501 def __init__(
501 def __init__(
502 self,
502 self,
503 unencoded_path,
503 unencoded_path,
504 revlog_type,
504 revlog_type,
505 is_revlog_main=False,
505 is_revlog_main=False,
506 is_volatile=False,
506 is_volatile=False,
507 file_size=None,
507 file_size=None,
508 ):
508 ):
509 super().__init__(
509 super().__init__(
510 unencoded_path=unencoded_path,
510 unencoded_path=unencoded_path,
511 is_volatile=is_volatile,
511 is_volatile=is_volatile,
512 file_size=file_size,
512 file_size=file_size,
513 )
513 )
514 self.revlog_type = revlog_type
514 self.revlog_type = revlog_type
515 self.is_revlog_main = is_revlog_main
515 self.is_revlog_main = is_revlog_main
516
516
517
517
518 @attr.s(slots=True)
518 @attr.s(slots=True)
519 class StoreFile:
519 class StoreFile:
520 """a file matching an entry"""
520 """a file matching an entry"""
521
521
522 unencoded_path = attr.ib()
522 unencoded_path = attr.ib()
523 _file_size = attr.ib(default=False)
523 _file_size = attr.ib(default=False)
524 is_volatile = attr.ib(default=False)
524 is_volatile = attr.ib(default=False)
525
525
526 def file_size(self, vfs):
526 def file_size(self, vfs):
527 if self._file_size is not None:
527 if self._file_size is not None:
528 return self._file_size
528 return self._file_size
529 try:
529 try:
530 return vfs.stat(self.unencoded_path).st_size
530 return vfs.stat(self.unencoded_path).st_size
531 except FileNotFoundError:
531 except FileNotFoundError:
532 return 0
532 return 0
533
533
534
534
535 class basicstore:
535 class basicstore:
536 '''base class for local repository stores'''
536 '''base class for local repository stores'''
537
537
538 def __init__(self, path, vfstype):
538 def __init__(self, path, vfstype):
539 vfs = vfstype(path)
539 vfs = vfstype(path)
540 self.path = vfs.base
540 self.path = vfs.base
541 self.createmode = _calcmode(vfs)
541 self.createmode = _calcmode(vfs)
542 vfs.createmode = self.createmode
542 vfs.createmode = self.createmode
543 self.rawvfs = vfs
543 self.rawvfs = vfs
544 self.vfs = vfsmod.filtervfs(vfs, encodedir)
544 self.vfs = vfsmod.filtervfs(vfs, encodedir)
545 self.opener = self.vfs
545 self.opener = self.vfs
546
546
547 def join(self, f):
547 def join(self, f):
548 return self.path + b'/' + encodedir(f)
548 return self.path + b'/' + encodedir(f)
549
549
550 def _walk(self, relpath, recurse):
550 def _walk(self, relpath, recurse):
551 '''yields (revlog_type, unencoded, size)'''
551 '''yields (revlog_type, unencoded, size)'''
552 path = self.path
552 path = self.path
553 if relpath:
553 if relpath:
554 path += b'/' + relpath
554 path += b'/' + relpath
555 striplen = len(self.path) + 1
555 striplen = len(self.path) + 1
556 l = []
556 l = []
557 if self.rawvfs.isdir(path):
557 if self.rawvfs.isdir(path):
558 visit = [path]
558 visit = [path]
559 readdir = self.rawvfs.readdir
559 readdir = self.rawvfs.readdir
560 while visit:
560 while visit:
561 p = visit.pop()
561 p = visit.pop()
562 for f, kind, st in readdir(p, stat=True):
562 for f, kind, st in readdir(p, stat=True):
563 fp = p + b'/' + f
563 fp = p + b'/' + f
564 rl_type = is_revlog(f, kind, st)
564 rl_type = is_revlog(f, kind, st)
565 if rl_type is not None:
565 if rl_type is not None:
566 n = util.pconvert(fp[striplen:])
566 n = util.pconvert(fp[striplen:])
567 l.append((rl_type, decodedir(n), st.st_size))
567 l.append((decodedir(n), (rl_type, st.st_size)))
568 elif kind == stat.S_IFDIR and recurse:
568 elif kind == stat.S_IFDIR and recurse:
569 visit.append(fp)
569 visit.append(fp)
570
570 l.sort()
571 l.sort()
571 return l
572 return l
572
573
573 def changelog(self, trypending, concurrencychecker=None):
574 def changelog(self, trypending, concurrencychecker=None):
574 return changelog.changelog(
575 return changelog.changelog(
575 self.vfs,
576 self.vfs,
576 trypending=trypending,
577 trypending=trypending,
577 concurrencychecker=concurrencychecker,
578 concurrencychecker=concurrencychecker,
578 )
579 )
579
580
580 def manifestlog(self, repo, storenarrowmatch):
581 def manifestlog(self, repo, storenarrowmatch):
581 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
582 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
582 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
583 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
583
584
584 def datafiles(
585 def datafiles(
585 self, matcher=None, undecodable=None
586 self, matcher=None, undecodable=None
586 ) -> Generator[BaseStoreEntry, None, None]:
587 ) -> Generator[BaseStoreEntry, None, None]:
587 """Like walk, but excluding the changelog and root manifest.
588 """Like walk, but excluding the changelog and root manifest.
588
589
589 When [undecodable] is None, revlogs names that can't be
590 When [undecodable] is None, revlogs names that can't be
590 decoded cause an exception. When it is provided, it should
591 decoded cause an exception. When it is provided, it should
591 be a list and the filenames that can't be decoded are added
592 be a list and the filenames that can't be decoded are added
592 to it instead. This is very rarely needed."""
593 to it instead. This is very rarely needed."""
593 files = self._walk(b'data', True) + self._walk(b'meta', True)
594 files = self._walk(b'data', True) + self._walk(b'meta', True)
594 for (t, u, s) in files:
595 for u, (t, s) in files:
595 if t is not None:
596 if t is not None:
596 yield RevlogStoreEntry(
597 yield RevlogStoreEntry(
597 unencoded_path=u,
598 unencoded_path=u,
598 revlog_type=FILEFLAGS_FILELOG,
599 revlog_type=FILEFLAGS_FILELOG,
599 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
600 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
600 is_volatile=bool(t & FILEFLAGS_VOLATILE),
601 is_volatile=bool(t & FILEFLAGS_VOLATILE),
601 file_size=s,
602 file_size=s,
602 )
603 )
603
604
604 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
605 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
605 # yield manifest before changelog
606 # yield manifest before changelog
606 files = reversed(self._walk(b'', False))
607 files = self._walk(b'', False)
607 for (t, u, s) in files:
608 # key is (type, path) (keeping ordering so we get 00changelog.i last)
609 type_key = lambda x: (x[1][0], x[0])
610 files = sorted(files, reverse=True, key=type_key)
611 for u, (t, s) in files:
608 if u.startswith(b'00changelog'):
612 if u.startswith(b'00changelog'):
609 yield RevlogStoreEntry(
613 yield RevlogStoreEntry(
610 unencoded_path=u,
614 unencoded_path=u,
611 revlog_type=FILEFLAGS_CHANGELOG,
615 revlog_type=FILEFLAGS_CHANGELOG,
612 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
616 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
613 is_volatile=bool(t & FILEFLAGS_VOLATILE),
617 is_volatile=bool(t & FILEFLAGS_VOLATILE),
614 file_size=s,
618 file_size=s,
615 )
619 )
616 elif u.startswith(b'00manifest'):
620 elif u.startswith(b'00manifest'):
617 yield RevlogStoreEntry(
621 yield RevlogStoreEntry(
618 unencoded_path=u,
622 unencoded_path=u,
619 revlog_type=FILEFLAGS_MANIFESTLOG,
623 revlog_type=FILEFLAGS_MANIFESTLOG,
620 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
624 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
621 is_volatile=bool(t & FILEFLAGS_VOLATILE),
625 is_volatile=bool(t & FILEFLAGS_VOLATILE),
622 file_size=s,
626 file_size=s,
623 )
627 )
624 else:
628 else:
625 yield SimpleStoreEntry(
629 yield SimpleStoreEntry(
626 unencoded_path=u,
630 unencoded_path=u,
627 is_volatile=bool(t & FILEFLAGS_VOLATILE),
631 is_volatile=bool(t & FILEFLAGS_VOLATILE),
628 file_size=s,
632 file_size=s,
629 )
633 )
630
634
631 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
635 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
632 """return files related to data storage (ie: revlogs)
636 """return files related to data storage (ie: revlogs)
633
637
634 yields (file_type, unencoded, size)
638 yields (file_type, unencoded, size)
635
639
636 if a matcher is passed, storage files of only those tracked paths
640 if a matcher is passed, storage files of only those tracked paths
637 are passed with matches the matcher
641 are passed with matches the matcher
638 """
642 """
639 # yield data files first
643 # yield data files first
640 for x in self.datafiles(matcher):
644 for x in self.datafiles(matcher):
641 yield x
645 yield x
642 for x in self.topfiles():
646 for x in self.topfiles():
643 yield x
647 yield x
644
648
645 def copylist(self):
649 def copylist(self):
646 return _data
650 return _data
647
651
648 def write(self, tr):
652 def write(self, tr):
649 pass
653 pass
650
654
651 def invalidatecaches(self):
655 def invalidatecaches(self):
652 pass
656 pass
653
657
654 def markremoved(self, fn):
658 def markremoved(self, fn):
655 pass
659 pass
656
660
657 def __contains__(self, path):
661 def __contains__(self, path):
658 '''Checks if the store contains path'''
662 '''Checks if the store contains path'''
659 path = b"/".join((b"data", path))
663 path = b"/".join((b"data", path))
660 # file?
664 # file?
661 if self.vfs.exists(path + b".i"):
665 if self.vfs.exists(path + b".i"):
662 return True
666 return True
663 # dir?
667 # dir?
664 if not path.endswith(b"/"):
668 if not path.endswith(b"/"):
665 path = path + b"/"
669 path = path + b"/"
666 return self.vfs.exists(path)
670 return self.vfs.exists(path)
667
671
668
672
669 class encodedstore(basicstore):
673 class encodedstore(basicstore):
670 def __init__(self, path, vfstype):
674 def __init__(self, path, vfstype):
671 vfs = vfstype(path + b'/store')
675 vfs = vfstype(path + b'/store')
672 self.path = vfs.base
676 self.path = vfs.base
673 self.createmode = _calcmode(vfs)
677 self.createmode = _calcmode(vfs)
674 vfs.createmode = self.createmode
678 vfs.createmode = self.createmode
675 self.rawvfs = vfs
679 self.rawvfs = vfs
676 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
680 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
677 self.opener = self.vfs
681 self.opener = self.vfs
678
682
679 # note: topfiles would also need a decode phase. It is just that in
683 # note: topfiles would also need a decode phase. It is just that in
680 # practice we do not have any file outside of `data/` that needs encoding.
684 # practice we do not have any file outside of `data/` that needs encoding.
681 # However that might change so we should probably add a test and encoding
685 # However that might change so we should probably add a test and encoding
682 # decoding for it too. see issue6548
686 # decoding for it too. see issue6548
683
687
684 def datafiles(
688 def datafiles(
685 self, matcher=None, undecodable=None
689 self, matcher=None, undecodable=None
686 ) -> Generator[BaseStoreEntry, None, None]:
690 ) -> Generator[BaseStoreEntry, None, None]:
687 for entry in super(encodedstore, self).datafiles():
691 for entry in super(encodedstore, self).datafiles():
688 try:
692 try:
689 f1 = entry.unencoded_path
693 f1 = entry.unencoded_path
690 f2 = decodefilename(f1)
694 f2 = decodefilename(f1)
691 except KeyError:
695 except KeyError:
692 if undecodable is None:
696 if undecodable is None:
693 msg = _(b'undecodable revlog name %s') % f1
697 msg = _(b'undecodable revlog name %s') % f1
694 raise error.StorageError(msg)
698 raise error.StorageError(msg)
695 else:
699 else:
696 undecodable.append(f1)
700 undecodable.append(f1)
697 continue
701 continue
698 if not _matchtrackedpath(f2, matcher):
702 if not _matchtrackedpath(f2, matcher):
699 continue
703 continue
700 entry.unencoded_path = f2
704 entry.unencoded_path = f2
701 yield entry
705 yield entry
702
706
703 def join(self, f):
707 def join(self, f):
704 return self.path + b'/' + encodefilename(f)
708 return self.path + b'/' + encodefilename(f)
705
709
706 def copylist(self):
710 def copylist(self):
707 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
711 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
708
712
709
713
710 class fncache:
714 class fncache:
711 # the filename used to be partially encoded
715 # the filename used to be partially encoded
712 # hence the encodedir/decodedir dance
716 # hence the encodedir/decodedir dance
713 def __init__(self, vfs):
717 def __init__(self, vfs):
714 self.vfs = vfs
718 self.vfs = vfs
715 self._ignores = set()
719 self._ignores = set()
716 self.entries = None
720 self.entries = None
717 self._dirty = False
721 self._dirty = False
718 # set of new additions to fncache
722 # set of new additions to fncache
719 self.addls = set()
723 self.addls = set()
720
724
721 def ensureloaded(self, warn=None):
725 def ensureloaded(self, warn=None):
722 """read the fncache file if not already read.
726 """read the fncache file if not already read.
723
727
724 If the file on disk is corrupted, raise. If warn is provided,
728 If the file on disk is corrupted, raise. If warn is provided,
725 warn and keep going instead."""
729 warn and keep going instead."""
726 if self.entries is None:
730 if self.entries is None:
727 self._load(warn)
731 self._load(warn)
728
732
729 def _load(self, warn=None):
733 def _load(self, warn=None):
730 '''fill the entries from the fncache file'''
734 '''fill the entries from the fncache file'''
731 self._dirty = False
735 self._dirty = False
732 try:
736 try:
733 fp = self.vfs(b'fncache', mode=b'rb')
737 fp = self.vfs(b'fncache', mode=b'rb')
734 except IOError:
738 except IOError:
735 # skip nonexistent file
739 # skip nonexistent file
736 self.entries = set()
740 self.entries = set()
737 return
741 return
738
742
739 self.entries = set()
743 self.entries = set()
740 chunk = b''
744 chunk = b''
741 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
745 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
742 chunk += c
746 chunk += c
743 try:
747 try:
744 p = chunk.rindex(b'\n')
748 p = chunk.rindex(b'\n')
745 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
749 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
746 chunk = chunk[p + 1 :]
750 chunk = chunk[p + 1 :]
747 except ValueError:
751 except ValueError:
748 # substring '\n' not found, maybe the entry is bigger than the
752 # substring '\n' not found, maybe the entry is bigger than the
749 # chunksize, so let's keep iterating
753 # chunksize, so let's keep iterating
750 pass
754 pass
751
755
752 if chunk:
756 if chunk:
753 msg = _(b"fncache does not ends with a newline")
757 msg = _(b"fncache does not ends with a newline")
754 if warn:
758 if warn:
755 warn(msg + b'\n')
759 warn(msg + b'\n')
756 else:
760 else:
757 raise error.Abort(
761 raise error.Abort(
758 msg,
762 msg,
759 hint=_(
763 hint=_(
760 b"use 'hg debugrebuildfncache' to "
764 b"use 'hg debugrebuildfncache' to "
761 b"rebuild the fncache"
765 b"rebuild the fncache"
762 ),
766 ),
763 )
767 )
764 self._checkentries(fp, warn)
768 self._checkentries(fp, warn)
765 fp.close()
769 fp.close()
766
770
767 def _checkentries(self, fp, warn):
771 def _checkentries(self, fp, warn):
768 """make sure there is no empty string in entries"""
772 """make sure there is no empty string in entries"""
769 if b'' in self.entries:
773 if b'' in self.entries:
770 fp.seek(0)
774 fp.seek(0)
771 for n, line in enumerate(fp):
775 for n, line in enumerate(fp):
772 if not line.rstrip(b'\n'):
776 if not line.rstrip(b'\n'):
773 t = _(b'invalid entry in fncache, line %d') % (n + 1)
777 t = _(b'invalid entry in fncache, line %d') % (n + 1)
774 if warn:
778 if warn:
775 warn(t + b'\n')
779 warn(t + b'\n')
776 else:
780 else:
777 raise error.Abort(t)
781 raise error.Abort(t)
778
782
779 def write(self, tr):
783 def write(self, tr):
780 if self._dirty:
784 if self._dirty:
781 assert self.entries is not None
785 assert self.entries is not None
782 self.entries = self.entries | self.addls
786 self.entries = self.entries | self.addls
783 self.addls = set()
787 self.addls = set()
784 tr.addbackup(b'fncache')
788 tr.addbackup(b'fncache')
785 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
789 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
786 if self.entries:
790 if self.entries:
787 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
791 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
788 fp.close()
792 fp.close()
789 self._dirty = False
793 self._dirty = False
790 if self.addls:
794 if self.addls:
791 # if we have just new entries, let's append them to the fncache
795 # if we have just new entries, let's append them to the fncache
792 tr.addbackup(b'fncache')
796 tr.addbackup(b'fncache')
793 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
797 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
794 if self.addls:
798 if self.addls:
795 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
799 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
796 fp.close()
800 fp.close()
797 self.entries = None
801 self.entries = None
798 self.addls = set()
802 self.addls = set()
799
803
800 def addignore(self, fn):
804 def addignore(self, fn):
801 self._ignores.add(fn)
805 self._ignores.add(fn)
802
806
803 def add(self, fn):
807 def add(self, fn):
804 if fn in self._ignores:
808 if fn in self._ignores:
805 return
809 return
806 if self.entries is None:
810 if self.entries is None:
807 self._load()
811 self._load()
808 if fn not in self.entries:
812 if fn not in self.entries:
809 self.addls.add(fn)
813 self.addls.add(fn)
810
814
811 def remove(self, fn):
815 def remove(self, fn):
812 if self.entries is None:
816 if self.entries is None:
813 self._load()
817 self._load()
814 if fn in self.addls:
818 if fn in self.addls:
815 self.addls.remove(fn)
819 self.addls.remove(fn)
816 return
820 return
817 try:
821 try:
818 self.entries.remove(fn)
822 self.entries.remove(fn)
819 self._dirty = True
823 self._dirty = True
820 except KeyError:
824 except KeyError:
821 pass
825 pass
822
826
823 def __contains__(self, fn):
827 def __contains__(self, fn):
824 if fn in self.addls:
828 if fn in self.addls:
825 return True
829 return True
826 if self.entries is None:
830 if self.entries is None:
827 self._load()
831 self._load()
828 return fn in self.entries
832 return fn in self.entries
829
833
830 def __iter__(self):
834 def __iter__(self):
831 if self.entries is None:
835 if self.entries is None:
832 self._load()
836 self._load()
833 return iter(self.entries | self.addls)
837 return iter(self.entries | self.addls)
834
838
835
839
836 class _fncachevfs(vfsmod.proxyvfs):
840 class _fncachevfs(vfsmod.proxyvfs):
837 def __init__(self, vfs, fnc, encode):
841 def __init__(self, vfs, fnc, encode):
838 vfsmod.proxyvfs.__init__(self, vfs)
842 vfsmod.proxyvfs.__init__(self, vfs)
839 self.fncache = fnc
843 self.fncache = fnc
840 self.encode = encode
844 self.encode = encode
841
845
842 def __call__(self, path, mode=b'r', *args, **kw):
846 def __call__(self, path, mode=b'r', *args, **kw):
843 encoded = self.encode(path)
847 encoded = self.encode(path)
844 if (
848 if (
845 mode not in (b'r', b'rb')
849 mode not in (b'r', b'rb')
846 and (path.startswith(b'data/') or path.startswith(b'meta/'))
850 and (path.startswith(b'data/') or path.startswith(b'meta/'))
847 and revlog_type(path) is not None
851 and revlog_type(path) is not None
848 ):
852 ):
849 # do not trigger a fncache load when adding a file that already is
853 # do not trigger a fncache load when adding a file that already is
850 # known to exist.
854 # known to exist.
851 notload = self.fncache.entries is None and self.vfs.exists(encoded)
855 notload = self.fncache.entries is None and self.vfs.exists(encoded)
852 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
856 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
853 # when appending to an existing file, if the file has size zero,
857 # when appending to an existing file, if the file has size zero,
854 # it should be considered as missing. Such zero-size files are
858 # it should be considered as missing. Such zero-size files are
855 # the result of truncation when a transaction is aborted.
859 # the result of truncation when a transaction is aborted.
856 notload = False
860 notload = False
857 if not notload:
861 if not notload:
858 self.fncache.add(path)
862 self.fncache.add(path)
859 return self.vfs(encoded, mode, *args, **kw)
863 return self.vfs(encoded, mode, *args, **kw)
860
864
861 def join(self, path):
865 def join(self, path):
862 if path:
866 if path:
863 return self.vfs.join(self.encode(path))
867 return self.vfs.join(self.encode(path))
864 else:
868 else:
865 return self.vfs.join(path)
869 return self.vfs.join(path)
866
870
867 def register_file(self, path):
871 def register_file(self, path):
868 """generic hook point to lets fncache steer its stew"""
872 """generic hook point to lets fncache steer its stew"""
869 if path.startswith(b'data/') or path.startswith(b'meta/'):
873 if path.startswith(b'data/') or path.startswith(b'meta/'):
870 self.fncache.add(path)
874 self.fncache.add(path)
871
875
872
876
873 class fncachestore(basicstore):
877 class fncachestore(basicstore):
874 def __init__(self, path, vfstype, dotencode):
878 def __init__(self, path, vfstype, dotencode):
875 if dotencode:
879 if dotencode:
876 encode = _pathencode
880 encode = _pathencode
877 else:
881 else:
878 encode = _plainhybridencode
882 encode = _plainhybridencode
879 self.encode = encode
883 self.encode = encode
880 vfs = vfstype(path + b'/store')
884 vfs = vfstype(path + b'/store')
881 self.path = vfs.base
885 self.path = vfs.base
882 self.pathsep = self.path + b'/'
886 self.pathsep = self.path + b'/'
883 self.createmode = _calcmode(vfs)
887 self.createmode = _calcmode(vfs)
884 vfs.createmode = self.createmode
888 vfs.createmode = self.createmode
885 self.rawvfs = vfs
889 self.rawvfs = vfs
886 fnc = fncache(vfs)
890 fnc = fncache(vfs)
887 self.fncache = fnc
891 self.fncache = fnc
888 self.vfs = _fncachevfs(vfs, fnc, encode)
892 self.vfs = _fncachevfs(vfs, fnc, encode)
889 self.opener = self.vfs
893 self.opener = self.vfs
890
894
891 def join(self, f):
895 def join(self, f):
892 return self.pathsep + self.encode(f)
896 return self.pathsep + self.encode(f)
893
897
894 def getsize(self, path):
898 def getsize(self, path):
895 return self.rawvfs.stat(path).st_size
899 return self.rawvfs.stat(path).st_size
896
900
897 def datafiles(
901 def datafiles(
898 self, matcher=None, undecodable=None
902 self, matcher=None, undecodable=None
899 ) -> Generator[BaseStoreEntry, None, None]:
903 ) -> Generator[BaseStoreEntry, None, None]:
900 for f in sorted(self.fncache):
904 for f in sorted(self.fncache):
901 if not _matchtrackedpath(f, matcher):
905 if not _matchtrackedpath(f, matcher):
902 continue
906 continue
903 ef = self.encode(f)
907 ef = self.encode(f)
904 t = revlog_type(f)
908 t = revlog_type(f)
905 if t is None:
909 if t is None:
906 # Note: this should not be in the fncache then…
910 # Note: this should not be in the fncache then…
907 #
911 #
908 # However the fncache might contains such file added by
912 # However the fncache might contains such file added by
909 # previous version of Mercurial.
913 # previous version of Mercurial.
910 continue
914 continue
911 yield RevlogStoreEntry(
915 yield RevlogStoreEntry(
912 unencoded_path=f,
916 unencoded_path=f,
913 revlog_type=FILEFLAGS_FILELOG,
917 revlog_type=FILEFLAGS_FILELOG,
914 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
918 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
915 is_volatile=bool(t & FILEFLAGS_VOLATILE),
919 is_volatile=bool(t & FILEFLAGS_VOLATILE),
916 )
920 )
917
921
918 def copylist(self):
922 def copylist(self):
919 d = (
923 d = (
920 b'bookmarks',
924 b'bookmarks',
921 b'narrowspec',
925 b'narrowspec',
922 b'data',
926 b'data',
923 b'meta',
927 b'meta',
924 b'dh',
928 b'dh',
925 b'fncache',
929 b'fncache',
926 b'phaseroots',
930 b'phaseroots',
927 b'obsstore',
931 b'obsstore',
928 b'00manifest.d',
932 b'00manifest.d',
929 b'00manifest.i',
933 b'00manifest.i',
930 b'00changelog.d',
934 b'00changelog.d',
931 b'00changelog.i',
935 b'00changelog.i',
932 b'requires',
936 b'requires',
933 )
937 )
934 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
938 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
935
939
936 def write(self, tr):
940 def write(self, tr):
937 self.fncache.write(tr)
941 self.fncache.write(tr)
938
942
939 def invalidatecaches(self):
943 def invalidatecaches(self):
940 self.fncache.entries = None
944 self.fncache.entries = None
941 self.fncache.addls = set()
945 self.fncache.addls = set()
942
946
943 def markremoved(self, fn):
947 def markremoved(self, fn):
944 self.fncache.remove(fn)
948 self.fncache.remove(fn)
945
949
946 def _exists(self, f):
950 def _exists(self, f):
947 ef = self.encode(f)
951 ef = self.encode(f)
948 try:
952 try:
949 self.getsize(ef)
953 self.getsize(ef)
950 return True
954 return True
951 except FileNotFoundError:
955 except FileNotFoundError:
952 return False
956 return False
953
957
954 def __contains__(self, path):
958 def __contains__(self, path):
955 '''Checks if the store contains path'''
959 '''Checks if the store contains path'''
956 path = b"/".join((b"data", path))
960 path = b"/".join((b"data", path))
957 # check for files (exact match)
961 # check for files (exact match)
958 e = path + b'.i'
962 e = path + b'.i'
959 if e in self.fncache and self._exists(e):
963 if e in self.fncache and self._exists(e):
960 return True
964 return True
961 # now check for directories (prefix match)
965 # now check for directories (prefix match)
962 if not path.endswith(b'/'):
966 if not path.endswith(b'/'):
963 path += b'/'
967 path += b'/'
964 for e in self.fncache:
968 for e in self.fncache:
965 if e.startswith(path) and self._exists(e):
969 if e.startswith(path) and self._exists(e):
966 return True
970 return True
967 return False
971 return False
General Comments 0
You need to be logged in to leave comments. Login now