##// END OF EJS Templates
store: only access file_size information through the file object...
marmoute -
r51368:7d4d2a16 default
parent child Browse files
Show More
@@ -1,963 +1,963 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # files that are "volatile" and might change between listing and streaming
398 # files that are "volatile" and might change between listing and streaming
399 #
399 #
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 # deleted.
401 # deleted.
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403
403
404 # some exception to the above matching
404 # some exception to the above matching
405 #
405 #
406 # XXX This is currently not in use because of issue6542
406 # XXX This is currently not in use because of issue6542
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408
408
409
409
410 def is_revlog(f, kind, st):
410 def is_revlog(f, kind, st):
411 if kind != stat.S_IFREG:
411 if kind != stat.S_IFREG:
412 return None
412 return None
413 return revlog_type(f)
413 return revlog_type(f)
414
414
415
415
416 def revlog_type(f):
416 def revlog_type(f):
417 # XXX we need to filter `undo.` created by the transaction here, however
417 # XXX we need to filter `undo.` created by the transaction here, however
418 # being naive about it also filter revlog for `undo.*` files, leading to
418 # being naive about it also filter revlog for `undo.*` files, leading to
419 # issue6542. So we no longer use EXCLUDED.
419 # issue6542. So we no longer use EXCLUDED.
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 return FILEFLAGS_REVLOG_MAIN
421 return FILEFLAGS_REVLOG_MAIN
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 t = FILETYPE_FILELOG_OTHER
423 t = FILETYPE_FILELOG_OTHER
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 t |= FILEFLAGS_VOLATILE
425 t |= FILEFLAGS_VOLATILE
426 return t
426 return t
427 return None
427 return None
428
428
429
429
430 # the file is part of changelog data
430 # the file is part of changelog data
431 FILEFLAGS_CHANGELOG = 1 << 13
431 FILEFLAGS_CHANGELOG = 1 << 13
432 # the file is part of manifest data
432 # the file is part of manifest data
433 FILEFLAGS_MANIFESTLOG = 1 << 12
433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 # the file is part of filelog data
434 # the file is part of filelog data
435 FILEFLAGS_FILELOG = 1 << 11
435 FILEFLAGS_FILELOG = 1 << 11
436 # file that are not directly part of a revlog
436 # file that are not directly part of a revlog
437 FILEFLAGS_OTHER = 1 << 10
437 FILEFLAGS_OTHER = 1 << 10
438
438
439 # the main entry point for a revlog
439 # the main entry point for a revlog
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 # a secondary file for a revlog
441 # a secondary file for a revlog
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443
443
444 # files that are "volatile" and might change between listing and streaming
444 # files that are "volatile" and might change between listing and streaming
445 FILEFLAGS_VOLATILE = 1 << 20
445 FILEFLAGS_VOLATILE = 1 << 20
446
446
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
454
454
455
455
456 @attr.s(slots=True, init=False)
456 @attr.s(slots=True, init=False)
457 class BaseStoreEntry:
457 class BaseStoreEntry:
458 """An entry in the store
458 """An entry in the store
459
459
460 This is returned by `store.walk` and represent some data in the store."""
460 This is returned by `store.walk` and represent some data in the store."""
461
461
462 unencoded_path = attr.ib()
462 unencoded_path = attr.ib()
463 is_volatile = attr.ib(default=False)
463 is_volatile = attr.ib(default=False)
464 file_size = attr.ib(default=None)
464 _file_size = attr.ib(default=None)
465
465
466 def __init__(
466 def __init__(
467 self,
467 self,
468 unencoded_path,
468 unencoded_path,
469 is_volatile=False,
469 is_volatile=False,
470 file_size=None,
470 file_size=None,
471 ):
471 ):
472 self.unencoded_path = unencoded_path
472 self.unencoded_path = unencoded_path
473 self.is_volatile = is_volatile
473 self.is_volatile = is_volatile
474 self.file_size = file_size
474 self._file_size = file_size
475
475
476 def files(self):
476 def files(self):
477 return [
477 return [
478 StoreFile(
478 StoreFile(
479 unencoded_path=self.unencoded_path,
479 unencoded_path=self.unencoded_path,
480 file_size=self.file_size,
480 file_size=self._file_size,
481 is_volatile=self.is_volatile,
481 is_volatile=self.is_volatile,
482 )
482 )
483 ]
483 ]
484
484
485
485
486 @attr.s(slots=True, init=False)
486 @attr.s(slots=True, init=False)
487 class SimpleStoreEntry(BaseStoreEntry):
487 class SimpleStoreEntry(BaseStoreEntry):
488 """A generic entry in the store"""
488 """A generic entry in the store"""
489
489
490 is_revlog = False
490 is_revlog = False
491
491
492
492
493 @attr.s(slots=True, init=False)
493 @attr.s(slots=True, init=False)
494 class RevlogStoreEntry(BaseStoreEntry):
494 class RevlogStoreEntry(BaseStoreEntry):
495 """A revlog entry in the store"""
495 """A revlog entry in the store"""
496
496
497 is_revlog = True
497 is_revlog = True
498 revlog_type = attr.ib(default=None)
498 revlog_type = attr.ib(default=None)
499 is_revlog_main = attr.ib(default=None)
499 is_revlog_main = attr.ib(default=None)
500
500
501 def __init__(
501 def __init__(
502 self,
502 self,
503 unencoded_path,
503 unencoded_path,
504 revlog_type,
504 revlog_type,
505 is_revlog_main=False,
505 is_revlog_main=False,
506 is_volatile=False,
506 is_volatile=False,
507 file_size=None,
507 file_size=None,
508 ):
508 ):
509 super().__init__(
509 super().__init__(
510 unencoded_path=unencoded_path,
510 unencoded_path=unencoded_path,
511 is_volatile=is_volatile,
511 is_volatile=is_volatile,
512 file_size=file_size,
512 file_size=file_size,
513 )
513 )
514 self.revlog_type = revlog_type
514 self.revlog_type = revlog_type
515 self.is_revlog_main = is_revlog_main
515 self.is_revlog_main = is_revlog_main
516
516
517
517
518 @attr.s(slots=True)
518 @attr.s(slots=True)
519 class StoreFile:
519 class StoreFile:
520 """a file matching an entry"""
520 """a file matching an entry"""
521
521
522 unencoded_path = attr.ib()
522 unencoded_path = attr.ib()
523 file_size = attr.ib()
523 file_size = attr.ib()
524 is_volatile = attr.ib(default=False)
524 is_volatile = attr.ib(default=False)
525
525
526
526
527 class basicstore:
527 class basicstore:
528 '''base class for local repository stores'''
528 '''base class for local repository stores'''
529
529
530 def __init__(self, path, vfstype):
530 def __init__(self, path, vfstype):
531 vfs = vfstype(path)
531 vfs = vfstype(path)
532 self.path = vfs.base
532 self.path = vfs.base
533 self.createmode = _calcmode(vfs)
533 self.createmode = _calcmode(vfs)
534 vfs.createmode = self.createmode
534 vfs.createmode = self.createmode
535 self.rawvfs = vfs
535 self.rawvfs = vfs
536 self.vfs = vfsmod.filtervfs(vfs, encodedir)
536 self.vfs = vfsmod.filtervfs(vfs, encodedir)
537 self.opener = self.vfs
537 self.opener = self.vfs
538
538
539 def join(self, f):
539 def join(self, f):
540 return self.path + b'/' + encodedir(f)
540 return self.path + b'/' + encodedir(f)
541
541
542 def _walk(self, relpath, recurse):
542 def _walk(self, relpath, recurse):
543 '''yields (revlog_type, unencoded, size)'''
543 '''yields (revlog_type, unencoded, size)'''
544 path = self.path
544 path = self.path
545 if relpath:
545 if relpath:
546 path += b'/' + relpath
546 path += b'/' + relpath
547 striplen = len(self.path) + 1
547 striplen = len(self.path) + 1
548 l = []
548 l = []
549 if self.rawvfs.isdir(path):
549 if self.rawvfs.isdir(path):
550 visit = [path]
550 visit = [path]
551 readdir = self.rawvfs.readdir
551 readdir = self.rawvfs.readdir
552 while visit:
552 while visit:
553 p = visit.pop()
553 p = visit.pop()
554 for f, kind, st in readdir(p, stat=True):
554 for f, kind, st in readdir(p, stat=True):
555 fp = p + b'/' + f
555 fp = p + b'/' + f
556 rl_type = is_revlog(f, kind, st)
556 rl_type = is_revlog(f, kind, st)
557 if rl_type is not None:
557 if rl_type is not None:
558 n = util.pconvert(fp[striplen:])
558 n = util.pconvert(fp[striplen:])
559 l.append((rl_type, decodedir(n), st.st_size))
559 l.append((rl_type, decodedir(n), st.st_size))
560 elif kind == stat.S_IFDIR and recurse:
560 elif kind == stat.S_IFDIR and recurse:
561 visit.append(fp)
561 visit.append(fp)
562 l.sort()
562 l.sort()
563 return l
563 return l
564
564
565 def changelog(self, trypending, concurrencychecker=None):
565 def changelog(self, trypending, concurrencychecker=None):
566 return changelog.changelog(
566 return changelog.changelog(
567 self.vfs,
567 self.vfs,
568 trypending=trypending,
568 trypending=trypending,
569 concurrencychecker=concurrencychecker,
569 concurrencychecker=concurrencychecker,
570 )
570 )
571
571
572 def manifestlog(self, repo, storenarrowmatch):
572 def manifestlog(self, repo, storenarrowmatch):
573 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
573 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
574 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
574 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
575
575
576 def datafiles(
576 def datafiles(
577 self, matcher=None, undecodable=None
577 self, matcher=None, undecodable=None
578 ) -> Generator[BaseStoreEntry, None, None]:
578 ) -> Generator[BaseStoreEntry, None, None]:
579 """Like walk, but excluding the changelog and root manifest.
579 """Like walk, but excluding the changelog and root manifest.
580
580
581 When [undecodable] is None, revlogs names that can't be
581 When [undecodable] is None, revlogs names that can't be
582 decoded cause an exception. When it is provided, it should
582 decoded cause an exception. When it is provided, it should
583 be a list and the filenames that can't be decoded are added
583 be a list and the filenames that can't be decoded are added
584 to it instead. This is very rarely needed."""
584 to it instead. This is very rarely needed."""
585 files = self._walk(b'data', True) + self._walk(b'meta', True)
585 files = self._walk(b'data', True) + self._walk(b'meta', True)
586 for (t, u, s) in files:
586 for (t, u, s) in files:
587 if t is not None:
587 if t is not None:
588 yield RevlogStoreEntry(
588 yield RevlogStoreEntry(
589 unencoded_path=u,
589 unencoded_path=u,
590 revlog_type=FILEFLAGS_FILELOG,
590 revlog_type=FILEFLAGS_FILELOG,
591 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
591 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
592 is_volatile=bool(t & FILEFLAGS_VOLATILE),
592 is_volatile=bool(t & FILEFLAGS_VOLATILE),
593 file_size=s,
593 file_size=s,
594 )
594 )
595
595
596 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
596 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
597 # yield manifest before changelog
597 # yield manifest before changelog
598 files = reversed(self._walk(b'', False))
598 files = reversed(self._walk(b'', False))
599 for (t, u, s) in files:
599 for (t, u, s) in files:
600 if u.startswith(b'00changelog'):
600 if u.startswith(b'00changelog'):
601 yield RevlogStoreEntry(
601 yield RevlogStoreEntry(
602 unencoded_path=u,
602 unencoded_path=u,
603 revlog_type=FILEFLAGS_CHANGELOG,
603 revlog_type=FILEFLAGS_CHANGELOG,
604 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
604 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
605 is_volatile=bool(t & FILEFLAGS_VOLATILE),
605 is_volatile=bool(t & FILEFLAGS_VOLATILE),
606 file_size=s,
606 file_size=s,
607 )
607 )
608 elif u.startswith(b'00manifest'):
608 elif u.startswith(b'00manifest'):
609 yield RevlogStoreEntry(
609 yield RevlogStoreEntry(
610 unencoded_path=u,
610 unencoded_path=u,
611 revlog_type=FILEFLAGS_MANIFESTLOG,
611 revlog_type=FILEFLAGS_MANIFESTLOG,
612 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
612 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
613 is_volatile=bool(t & FILEFLAGS_VOLATILE),
613 is_volatile=bool(t & FILEFLAGS_VOLATILE),
614 file_size=s,
614 file_size=s,
615 )
615 )
616 else:
616 else:
617 yield SimpleStoreEntry(
617 yield SimpleStoreEntry(
618 unencoded_path=u,
618 unencoded_path=u,
619 is_volatile=bool(t & FILEFLAGS_VOLATILE),
619 is_volatile=bool(t & FILEFLAGS_VOLATILE),
620 file_size=s,
620 file_size=s,
621 )
621 )
622
622
623 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
623 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
624 """return files related to data storage (ie: revlogs)
624 """return files related to data storage (ie: revlogs)
625
625
626 yields (file_type, unencoded, size)
626 yields (file_type, unencoded, size)
627
627
628 if a matcher is passed, storage files of only those tracked paths
628 if a matcher is passed, storage files of only those tracked paths
629 are passed with matches the matcher
629 are passed with matches the matcher
630 """
630 """
631 # yield data files first
631 # yield data files first
632 for x in self.datafiles(matcher):
632 for x in self.datafiles(matcher):
633 yield x
633 yield x
634 for x in self.topfiles():
634 for x in self.topfiles():
635 yield x
635 yield x
636
636
637 def copylist(self):
637 def copylist(self):
638 return _data
638 return _data
639
639
640 def write(self, tr):
640 def write(self, tr):
641 pass
641 pass
642
642
643 def invalidatecaches(self):
643 def invalidatecaches(self):
644 pass
644 pass
645
645
646 def markremoved(self, fn):
646 def markremoved(self, fn):
647 pass
647 pass
648
648
649 def __contains__(self, path):
649 def __contains__(self, path):
650 '''Checks if the store contains path'''
650 '''Checks if the store contains path'''
651 path = b"/".join((b"data", path))
651 path = b"/".join((b"data", path))
652 # file?
652 # file?
653 if self.vfs.exists(path + b".i"):
653 if self.vfs.exists(path + b".i"):
654 return True
654 return True
655 # dir?
655 # dir?
656 if not path.endswith(b"/"):
656 if not path.endswith(b"/"):
657 path = path + b"/"
657 path = path + b"/"
658 return self.vfs.exists(path)
658 return self.vfs.exists(path)
659
659
660
660
661 class encodedstore(basicstore):
661 class encodedstore(basicstore):
662 def __init__(self, path, vfstype):
662 def __init__(self, path, vfstype):
663 vfs = vfstype(path + b'/store')
663 vfs = vfstype(path + b'/store')
664 self.path = vfs.base
664 self.path = vfs.base
665 self.createmode = _calcmode(vfs)
665 self.createmode = _calcmode(vfs)
666 vfs.createmode = self.createmode
666 vfs.createmode = self.createmode
667 self.rawvfs = vfs
667 self.rawvfs = vfs
668 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
668 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
669 self.opener = self.vfs
669 self.opener = self.vfs
670
670
671 # note: topfiles would also need a decode phase. It is just that in
671 # note: topfiles would also need a decode phase. It is just that in
672 # practice we do not have any file outside of `data/` that needs encoding.
672 # practice we do not have any file outside of `data/` that needs encoding.
673 # However that might change so we should probably add a test and encoding
673 # However that might change so we should probably add a test and encoding
674 # decoding for it too. see issue6548
674 # decoding for it too. see issue6548
675
675
676 def datafiles(
676 def datafiles(
677 self, matcher=None, undecodable=None
677 self, matcher=None, undecodable=None
678 ) -> Generator[BaseStoreEntry, None, None]:
678 ) -> Generator[BaseStoreEntry, None, None]:
679 for entry in super(encodedstore, self).datafiles():
679 for entry in super(encodedstore, self).datafiles():
680 try:
680 try:
681 f1 = entry.unencoded_path
681 f1 = entry.unencoded_path
682 f2 = decodefilename(f1)
682 f2 = decodefilename(f1)
683 except KeyError:
683 except KeyError:
684 if undecodable is None:
684 if undecodable is None:
685 msg = _(b'undecodable revlog name %s') % f1
685 msg = _(b'undecodable revlog name %s') % f1
686 raise error.StorageError(msg)
686 raise error.StorageError(msg)
687 else:
687 else:
688 undecodable.append(f1)
688 undecodable.append(f1)
689 continue
689 continue
690 if not _matchtrackedpath(f2, matcher):
690 if not _matchtrackedpath(f2, matcher):
691 continue
691 continue
692 entry.unencoded_path = f2
692 entry.unencoded_path = f2
693 yield entry
693 yield entry
694
694
695 def join(self, f):
695 def join(self, f):
696 return self.path + b'/' + encodefilename(f)
696 return self.path + b'/' + encodefilename(f)
697
697
698 def copylist(self):
698 def copylist(self):
699 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
699 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
700
700
701
701
702 class fncache:
702 class fncache:
703 # the filename used to be partially encoded
703 # the filename used to be partially encoded
704 # hence the encodedir/decodedir dance
704 # hence the encodedir/decodedir dance
705 def __init__(self, vfs):
705 def __init__(self, vfs):
706 self.vfs = vfs
706 self.vfs = vfs
707 self._ignores = set()
707 self._ignores = set()
708 self.entries = None
708 self.entries = None
709 self._dirty = False
709 self._dirty = False
710 # set of new additions to fncache
710 # set of new additions to fncache
711 self.addls = set()
711 self.addls = set()
712
712
713 def ensureloaded(self, warn=None):
713 def ensureloaded(self, warn=None):
714 """read the fncache file if not already read.
714 """read the fncache file if not already read.
715
715
716 If the file on disk is corrupted, raise. If warn is provided,
716 If the file on disk is corrupted, raise. If warn is provided,
717 warn and keep going instead."""
717 warn and keep going instead."""
718 if self.entries is None:
718 if self.entries is None:
719 self._load(warn)
719 self._load(warn)
720
720
721 def _load(self, warn=None):
721 def _load(self, warn=None):
722 '''fill the entries from the fncache file'''
722 '''fill the entries from the fncache file'''
723 self._dirty = False
723 self._dirty = False
724 try:
724 try:
725 fp = self.vfs(b'fncache', mode=b'rb')
725 fp = self.vfs(b'fncache', mode=b'rb')
726 except IOError:
726 except IOError:
727 # skip nonexistent file
727 # skip nonexistent file
728 self.entries = set()
728 self.entries = set()
729 return
729 return
730
730
731 self.entries = set()
731 self.entries = set()
732 chunk = b''
732 chunk = b''
733 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
733 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
734 chunk += c
734 chunk += c
735 try:
735 try:
736 p = chunk.rindex(b'\n')
736 p = chunk.rindex(b'\n')
737 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
737 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
738 chunk = chunk[p + 1 :]
738 chunk = chunk[p + 1 :]
739 except ValueError:
739 except ValueError:
740 # substring '\n' not found, maybe the entry is bigger than the
740 # substring '\n' not found, maybe the entry is bigger than the
741 # chunksize, so let's keep iterating
741 # chunksize, so let's keep iterating
742 pass
742 pass
743
743
744 if chunk:
744 if chunk:
745 msg = _(b"fncache does not ends with a newline")
745 msg = _(b"fncache does not ends with a newline")
746 if warn:
746 if warn:
747 warn(msg + b'\n')
747 warn(msg + b'\n')
748 else:
748 else:
749 raise error.Abort(
749 raise error.Abort(
750 msg,
750 msg,
751 hint=_(
751 hint=_(
752 b"use 'hg debugrebuildfncache' to "
752 b"use 'hg debugrebuildfncache' to "
753 b"rebuild the fncache"
753 b"rebuild the fncache"
754 ),
754 ),
755 )
755 )
756 self._checkentries(fp, warn)
756 self._checkentries(fp, warn)
757 fp.close()
757 fp.close()
758
758
759 def _checkentries(self, fp, warn):
759 def _checkentries(self, fp, warn):
760 """make sure there is no empty string in entries"""
760 """make sure there is no empty string in entries"""
761 if b'' in self.entries:
761 if b'' in self.entries:
762 fp.seek(0)
762 fp.seek(0)
763 for n, line in enumerate(fp):
763 for n, line in enumerate(fp):
764 if not line.rstrip(b'\n'):
764 if not line.rstrip(b'\n'):
765 t = _(b'invalid entry in fncache, line %d') % (n + 1)
765 t = _(b'invalid entry in fncache, line %d') % (n + 1)
766 if warn:
766 if warn:
767 warn(t + b'\n')
767 warn(t + b'\n')
768 else:
768 else:
769 raise error.Abort(t)
769 raise error.Abort(t)
770
770
771 def write(self, tr):
771 def write(self, tr):
772 if self._dirty:
772 if self._dirty:
773 assert self.entries is not None
773 assert self.entries is not None
774 self.entries = self.entries | self.addls
774 self.entries = self.entries | self.addls
775 self.addls = set()
775 self.addls = set()
776 tr.addbackup(b'fncache')
776 tr.addbackup(b'fncache')
777 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
777 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
778 if self.entries:
778 if self.entries:
779 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
779 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
780 fp.close()
780 fp.close()
781 self._dirty = False
781 self._dirty = False
782 if self.addls:
782 if self.addls:
783 # if we have just new entries, let's append them to the fncache
783 # if we have just new entries, let's append them to the fncache
784 tr.addbackup(b'fncache')
784 tr.addbackup(b'fncache')
785 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
785 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
786 if self.addls:
786 if self.addls:
787 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
787 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
788 fp.close()
788 fp.close()
789 self.entries = None
789 self.entries = None
790 self.addls = set()
790 self.addls = set()
791
791
792 def addignore(self, fn):
792 def addignore(self, fn):
793 self._ignores.add(fn)
793 self._ignores.add(fn)
794
794
795 def add(self, fn):
795 def add(self, fn):
796 if fn in self._ignores:
796 if fn in self._ignores:
797 return
797 return
798 if self.entries is None:
798 if self.entries is None:
799 self._load()
799 self._load()
800 if fn not in self.entries:
800 if fn not in self.entries:
801 self.addls.add(fn)
801 self.addls.add(fn)
802
802
803 def remove(self, fn):
803 def remove(self, fn):
804 if self.entries is None:
804 if self.entries is None:
805 self._load()
805 self._load()
806 if fn in self.addls:
806 if fn in self.addls:
807 self.addls.remove(fn)
807 self.addls.remove(fn)
808 return
808 return
809 try:
809 try:
810 self.entries.remove(fn)
810 self.entries.remove(fn)
811 self._dirty = True
811 self._dirty = True
812 except KeyError:
812 except KeyError:
813 pass
813 pass
814
814
815 def __contains__(self, fn):
815 def __contains__(self, fn):
816 if fn in self.addls:
816 if fn in self.addls:
817 return True
817 return True
818 if self.entries is None:
818 if self.entries is None:
819 self._load()
819 self._load()
820 return fn in self.entries
820 return fn in self.entries
821
821
822 def __iter__(self):
822 def __iter__(self):
823 if self.entries is None:
823 if self.entries is None:
824 self._load()
824 self._load()
825 return iter(self.entries | self.addls)
825 return iter(self.entries | self.addls)
826
826
827
827
828 class _fncachevfs(vfsmod.proxyvfs):
828 class _fncachevfs(vfsmod.proxyvfs):
829 def __init__(self, vfs, fnc, encode):
829 def __init__(self, vfs, fnc, encode):
830 vfsmod.proxyvfs.__init__(self, vfs)
830 vfsmod.proxyvfs.__init__(self, vfs)
831 self.fncache = fnc
831 self.fncache = fnc
832 self.encode = encode
832 self.encode = encode
833
833
834 def __call__(self, path, mode=b'r', *args, **kw):
834 def __call__(self, path, mode=b'r', *args, **kw):
835 encoded = self.encode(path)
835 encoded = self.encode(path)
836 if (
836 if (
837 mode not in (b'r', b'rb')
837 mode not in (b'r', b'rb')
838 and (path.startswith(b'data/') or path.startswith(b'meta/'))
838 and (path.startswith(b'data/') or path.startswith(b'meta/'))
839 and revlog_type(path) is not None
839 and revlog_type(path) is not None
840 ):
840 ):
841 # do not trigger a fncache load when adding a file that already is
841 # do not trigger a fncache load when adding a file that already is
842 # known to exist.
842 # known to exist.
843 notload = self.fncache.entries is None and self.vfs.exists(encoded)
843 notload = self.fncache.entries is None and self.vfs.exists(encoded)
844 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
844 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
845 # when appending to an existing file, if the file has size zero,
845 # when appending to an existing file, if the file has size zero,
846 # it should be considered as missing. Such zero-size files are
846 # it should be considered as missing. Such zero-size files are
847 # the result of truncation when a transaction is aborted.
847 # the result of truncation when a transaction is aborted.
848 notload = False
848 notload = False
849 if not notload:
849 if not notload:
850 self.fncache.add(path)
850 self.fncache.add(path)
851 return self.vfs(encoded, mode, *args, **kw)
851 return self.vfs(encoded, mode, *args, **kw)
852
852
853 def join(self, path):
853 def join(self, path):
854 if path:
854 if path:
855 return self.vfs.join(self.encode(path))
855 return self.vfs.join(self.encode(path))
856 else:
856 else:
857 return self.vfs.join(path)
857 return self.vfs.join(path)
858
858
859 def register_file(self, path):
859 def register_file(self, path):
860 """generic hook point to lets fncache steer its stew"""
860 """generic hook point to lets fncache steer its stew"""
861 if path.startswith(b'data/') or path.startswith(b'meta/'):
861 if path.startswith(b'data/') or path.startswith(b'meta/'):
862 self.fncache.add(path)
862 self.fncache.add(path)
863
863
864
864
865 class fncachestore(basicstore):
865 class fncachestore(basicstore):
866 def __init__(self, path, vfstype, dotencode):
866 def __init__(self, path, vfstype, dotencode):
867 if dotencode:
867 if dotencode:
868 encode = _pathencode
868 encode = _pathencode
869 else:
869 else:
870 encode = _plainhybridencode
870 encode = _plainhybridencode
871 self.encode = encode
871 self.encode = encode
872 vfs = vfstype(path + b'/store')
872 vfs = vfstype(path + b'/store')
873 self.path = vfs.base
873 self.path = vfs.base
874 self.pathsep = self.path + b'/'
874 self.pathsep = self.path + b'/'
875 self.createmode = _calcmode(vfs)
875 self.createmode = _calcmode(vfs)
876 vfs.createmode = self.createmode
876 vfs.createmode = self.createmode
877 self.rawvfs = vfs
877 self.rawvfs = vfs
878 fnc = fncache(vfs)
878 fnc = fncache(vfs)
879 self.fncache = fnc
879 self.fncache = fnc
880 self.vfs = _fncachevfs(vfs, fnc, encode)
880 self.vfs = _fncachevfs(vfs, fnc, encode)
881 self.opener = self.vfs
881 self.opener = self.vfs
882
882
883 def join(self, f):
883 def join(self, f):
884 return self.pathsep + self.encode(f)
884 return self.pathsep + self.encode(f)
885
885
886 def getsize(self, path):
886 def getsize(self, path):
887 return self.rawvfs.stat(path).st_size
887 return self.rawvfs.stat(path).st_size
888
888
889 def datafiles(
889 def datafiles(
890 self, matcher=None, undecodable=None
890 self, matcher=None, undecodable=None
891 ) -> Generator[BaseStoreEntry, None, None]:
891 ) -> Generator[BaseStoreEntry, None, None]:
892 for f in sorted(self.fncache):
892 for f in sorted(self.fncache):
893 if not _matchtrackedpath(f, matcher):
893 if not _matchtrackedpath(f, matcher):
894 continue
894 continue
895 ef = self.encode(f)
895 ef = self.encode(f)
896 t = revlog_type(f)
896 t = revlog_type(f)
897 if t is None:
897 if t is None:
898 # Note: this should not be in the fncache then…
898 # Note: this should not be in the fncache then…
899 #
899 #
900 # However the fncache might contains such file added by
900 # However the fncache might contains such file added by
901 # previous version of Mercurial.
901 # previous version of Mercurial.
902 continue
902 continue
903 try:
903 try:
904 yield RevlogStoreEntry(
904 yield RevlogStoreEntry(
905 unencoded_path=f,
905 unencoded_path=f,
906 revlog_type=FILEFLAGS_FILELOG,
906 revlog_type=FILEFLAGS_FILELOG,
907 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
907 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
908 is_volatile=bool(t & FILEFLAGS_VOLATILE),
908 is_volatile=bool(t & FILEFLAGS_VOLATILE),
909 file_size=self.getsize(ef),
909 file_size=self.getsize(ef),
910 )
910 )
911 except FileNotFoundError:
911 except FileNotFoundError:
912 pass
912 pass
913
913
914 def copylist(self):
914 def copylist(self):
915 d = (
915 d = (
916 b'bookmarks',
916 b'bookmarks',
917 b'narrowspec',
917 b'narrowspec',
918 b'data',
918 b'data',
919 b'meta',
919 b'meta',
920 b'dh',
920 b'dh',
921 b'fncache',
921 b'fncache',
922 b'phaseroots',
922 b'phaseroots',
923 b'obsstore',
923 b'obsstore',
924 b'00manifest.d',
924 b'00manifest.d',
925 b'00manifest.i',
925 b'00manifest.i',
926 b'00changelog.d',
926 b'00changelog.d',
927 b'00changelog.i',
927 b'00changelog.i',
928 b'requires',
928 b'requires',
929 )
929 )
930 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
930 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
931
931
932 def write(self, tr):
932 def write(self, tr):
933 self.fncache.write(tr)
933 self.fncache.write(tr)
934
934
935 def invalidatecaches(self):
935 def invalidatecaches(self):
936 self.fncache.entries = None
936 self.fncache.entries = None
937 self.fncache.addls = set()
937 self.fncache.addls = set()
938
938
939 def markremoved(self, fn):
939 def markremoved(self, fn):
940 self.fncache.remove(fn)
940 self.fncache.remove(fn)
941
941
942 def _exists(self, f):
942 def _exists(self, f):
943 ef = self.encode(f)
943 ef = self.encode(f)
944 try:
944 try:
945 self.getsize(ef)
945 self.getsize(ef)
946 return True
946 return True
947 except FileNotFoundError:
947 except FileNotFoundError:
948 return False
948 return False
949
949
950 def __contains__(self, path):
950 def __contains__(self, path):
951 '''Checks if the store contains path'''
951 '''Checks if the store contains path'''
952 path = b"/".join((b"data", path))
952 path = b"/".join((b"data", path))
953 # check for files (exact match)
953 # check for files (exact match)
954 e = path + b'.i'
954 e = path + b'.i'
955 if e in self.fncache and self._exists(e):
955 if e in self.fncache and self._exists(e):
956 return True
956 return True
957 # now check for directories (prefix match)
957 # now check for directories (prefix match)
958 if not path.endswith(b'/'):
958 if not path.endswith(b'/'):
959 path += b'/'
959 path += b'/'
960 for e in self.fncache:
960 for e in self.fncache:
961 if e.startswith(path) and self._exists(e):
961 if e.startswith(path) and self._exists(e):
962 return True
962 return True
963 return False
963 return False
General Comments 0
You need to be logged in to leave comments. Login now