##// END OF EJS Templates
store: split the wrapping of encodedstore between _wrap and datafiles...
marmoute -
r51386:816e8bc6 default
parent child Browse files
Show More
@@ -1,1058 +1,1058
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 REVLOG_FILES_LONG_EXT = (
399 REVLOG_FILES_LONG_EXT = (
400 b'.nd',
400 b'.nd',
401 b'.idx',
401 b'.idx',
402 b'.dat',
402 b'.dat',
403 b'.sda',
403 b'.sda',
404 )
404 )
405 # files that are "volatile" and might change between listing and streaming
405 # files that are "volatile" and might change between listing and streaming
406 #
406 #
407 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 # deleted.
408 # deleted.
409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410
410
411 # some exception to the above matching
411 # some exception to the above matching
412 #
412 #
413 # XXX This is currently not in use because of issue6542
413 # XXX This is currently not in use because of issue6542
414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415
415
416
416
417 def is_revlog(f, kind, st):
417 def is_revlog(f, kind, st):
418 if kind != stat.S_IFREG:
418 if kind != stat.S_IFREG:
419 return None
419 return None
420 return revlog_type(f)
420 return revlog_type(f)
421
421
422
422
423 def revlog_type(f):
423 def revlog_type(f):
424 # XXX we need to filter `undo.` created by the transaction here, however
424 # XXX we need to filter `undo.` created by the transaction here, however
425 # being naive about it also filter revlog for `undo.*` files, leading to
425 # being naive about it also filter revlog for `undo.*` files, leading to
426 # issue6542. So we no longer use EXCLUDED.
426 # issue6542. So we no longer use EXCLUDED.
427 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 return FILEFLAGS_REVLOG_MAIN
428 return FILEFLAGS_REVLOG_MAIN
429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 t = FILETYPE_FILELOG_OTHER
430 t = FILETYPE_FILELOG_OTHER
431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 t |= FILEFLAGS_VOLATILE
432 t |= FILEFLAGS_VOLATILE
433 return t
433 return t
434 return None
434 return None
435
435
436
436
437 # the file is part of changelog data
437 # the file is part of changelog data
438 FILEFLAGS_CHANGELOG = 1 << 13
438 FILEFLAGS_CHANGELOG = 1 << 13
439 # the file is part of manifest data
439 # the file is part of manifest data
440 FILEFLAGS_MANIFESTLOG = 1 << 12
440 FILEFLAGS_MANIFESTLOG = 1 << 12
441 # the file is part of filelog data
441 # the file is part of filelog data
442 FILEFLAGS_FILELOG = 1 << 11
442 FILEFLAGS_FILELOG = 1 << 11
443 # file that are not directly part of a revlog
443 # file that are not directly part of a revlog
444 FILEFLAGS_OTHER = 1 << 10
444 FILEFLAGS_OTHER = 1 << 10
445
445
446 # the main entry point for a revlog
446 # the main entry point for a revlog
447 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 # a secondary file for a revlog
448 # a secondary file for a revlog
449 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 FILEFLAGS_REVLOG_OTHER = 1 << 0
450
450
451 # files that are "volatile" and might change between listing and streaming
451 # files that are "volatile" and might change between listing and streaming
452 FILEFLAGS_VOLATILE = 1 << 20
452 FILEFLAGS_VOLATILE = 1 << 20
453
453
454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 FILETYPE_OTHER = FILEFLAGS_OTHER
460 FILETYPE_OTHER = FILEFLAGS_OTHER
461
461
462
462
463 @attr.s(slots=True, init=False)
463 @attr.s(slots=True, init=False)
464 class BaseStoreEntry:
464 class BaseStoreEntry:
465 """An entry in the store
465 """An entry in the store
466
466
467 This is returned by `store.walk` and represent some data in the store."""
467 This is returned by `store.walk` and represent some data in the store."""
468
468
469 unencoded_path = attr.ib()
469 unencoded_path = attr.ib()
470 _is_volatile = attr.ib(default=False)
470 _is_volatile = attr.ib(default=False)
471 _file_size = attr.ib(default=None)
471 _file_size = attr.ib(default=None)
472
472
473 def __init__(
473 def __init__(
474 self,
474 self,
475 unencoded_path,
475 unencoded_path,
476 is_volatile=False,
476 is_volatile=False,
477 file_size=None,
477 file_size=None,
478 ):
478 ):
479 self.unencoded_path = unencoded_path
479 self.unencoded_path = unencoded_path
480 self._is_volatile = is_volatile
480 self._is_volatile = is_volatile
481 self._file_size = file_size
481 self._file_size = file_size
482
482
483 def files(self):
483 def files(self):
484 return [
484 return [
485 StoreFile(
485 StoreFile(
486 unencoded_path=self.unencoded_path,
486 unencoded_path=self.unencoded_path,
487 file_size=self._file_size,
487 file_size=self._file_size,
488 is_volatile=self._is_volatile,
488 is_volatile=self._is_volatile,
489 )
489 )
490 ]
490 ]
491
491
492
492
493 @attr.s(slots=True, init=False)
493 @attr.s(slots=True, init=False)
494 class SimpleStoreEntry(BaseStoreEntry):
494 class SimpleStoreEntry(BaseStoreEntry):
495 """A generic entry in the store"""
495 """A generic entry in the store"""
496
496
497 is_revlog = False
497 is_revlog = False
498
498
499
499
500 @attr.s(slots=True, init=False)
500 @attr.s(slots=True, init=False)
501 class RevlogStoreEntry(BaseStoreEntry):
501 class RevlogStoreEntry(BaseStoreEntry):
502 """A revlog entry in the store"""
502 """A revlog entry in the store"""
503
503
504 is_revlog = True
504 is_revlog = True
505 revlog_type = attr.ib(default=None)
505 revlog_type = attr.ib(default=None)
506 target_id = attr.ib(default=None)
506 target_id = attr.ib(default=None)
507 is_revlog_main = attr.ib(default=None)
507 is_revlog_main = attr.ib(default=None)
508
508
509 def __init__(
509 def __init__(
510 self,
510 self,
511 unencoded_path,
511 unencoded_path,
512 revlog_type,
512 revlog_type,
513 target_id,
513 target_id,
514 is_revlog_main=False,
514 is_revlog_main=False,
515 is_volatile=False,
515 is_volatile=False,
516 file_size=None,
516 file_size=None,
517 ):
517 ):
518 super().__init__(
518 super().__init__(
519 unencoded_path=unencoded_path,
519 unencoded_path=unencoded_path,
520 is_volatile=is_volatile,
520 is_volatile=is_volatile,
521 file_size=file_size,
521 file_size=file_size,
522 )
522 )
523 self.revlog_type = revlog_type
523 self.revlog_type = revlog_type
524 self.target_id = target_id
524 self.target_id = target_id
525 self.is_revlog_main = is_revlog_main
525 self.is_revlog_main = is_revlog_main
526
526
527 def main_file_path(self):
527 def main_file_path(self):
528 """unencoded path of the main revlog file"""
528 """unencoded path of the main revlog file"""
529 return self.unencoded_path
529 return self.unencoded_path
530
530
531
531
532 @attr.s(slots=True)
532 @attr.s(slots=True)
533 class StoreFile:
533 class StoreFile:
534 """a file matching an entry"""
534 """a file matching an entry"""
535
535
536 unencoded_path = attr.ib()
536 unencoded_path = attr.ib()
537 _file_size = attr.ib(default=False)
537 _file_size = attr.ib(default=False)
538 is_volatile = attr.ib(default=False)
538 is_volatile = attr.ib(default=False)
539
539
540 def file_size(self, vfs):
540 def file_size(self, vfs):
541 if self._file_size is not None:
541 if self._file_size is not None:
542 return self._file_size
542 return self._file_size
543 try:
543 try:
544 return vfs.stat(self.unencoded_path).st_size
544 return vfs.stat(self.unencoded_path).st_size
545 except FileNotFoundError:
545 except FileNotFoundError:
546 return 0
546 return 0
547
547
548
548
549 def _gather_revlog(files_data):
549 def _gather_revlog(files_data):
550 """group files per revlog prefix
550 """group files per revlog prefix
551
551
552 The returns a two level nested dict. The top level key is the revlog prefix
552 The returns a two level nested dict. The top level key is the revlog prefix
553 without extension, the second level is all the file "suffix" that were
553 without extension, the second level is all the file "suffix" that were
554 seen for this revlog and arbitrary file data as value.
554 seen for this revlog and arbitrary file data as value.
555 """
555 """
556 revlogs = collections.defaultdict(dict)
556 revlogs = collections.defaultdict(dict)
557 for u, value in files_data:
557 for u, value in files_data:
558 name, ext = _split_revlog_ext(u)
558 name, ext = _split_revlog_ext(u)
559 revlogs[name][ext] = value
559 revlogs[name][ext] = value
560 return sorted(revlogs.items())
560 return sorted(revlogs.items())
561
561
562
562
563 def _split_revlog_ext(filename):
563 def _split_revlog_ext(filename):
564 """split the revlog file prefix from the variable extension"""
564 """split the revlog file prefix from the variable extension"""
565 if filename.endswith(REVLOG_FILES_LONG_EXT):
565 if filename.endswith(REVLOG_FILES_LONG_EXT):
566 char = b'-'
566 char = b'-'
567 else:
567 else:
568 char = b'.'
568 char = b'.'
569 idx = filename.rfind(char)
569 idx = filename.rfind(char)
570 return filename[:idx], filename[idx:]
570 return filename[:idx], filename[idx:]
571
571
572
572
573 def _ext_key(ext):
573 def _ext_key(ext):
574 """a key to order revlog suffix
574 """a key to order revlog suffix
575
575
576 important to issue .i after other entry."""
576 important to issue .i after other entry."""
577 # the only important part of this order is to keep the `.i` last.
577 # the only important part of this order is to keep the `.i` last.
578 if ext.endswith(b'.n'):
578 if ext.endswith(b'.n'):
579 return (0, ext)
579 return (0, ext)
580 elif ext.endswith(b'.nd'):
580 elif ext.endswith(b'.nd'):
581 return (10, ext)
581 return (10, ext)
582 elif ext.endswith(b'.d'):
582 elif ext.endswith(b'.d'):
583 return (20, ext)
583 return (20, ext)
584 elif ext.endswith(b'.i'):
584 elif ext.endswith(b'.i'):
585 return (50, ext)
585 return (50, ext)
586 else:
586 else:
587 return (40, ext)
587 return (40, ext)
588
588
589
589
590 class basicstore:
590 class basicstore:
591 '''base class for local repository stores'''
591 '''base class for local repository stores'''
592
592
593 def __init__(self, path, vfstype):
593 def __init__(self, path, vfstype):
594 vfs = vfstype(path)
594 vfs = vfstype(path)
595 self.path = vfs.base
595 self.path = vfs.base
596 self.createmode = _calcmode(vfs)
596 self.createmode = _calcmode(vfs)
597 vfs.createmode = self.createmode
597 vfs.createmode = self.createmode
598 self.rawvfs = vfs
598 self.rawvfs = vfs
599 self.vfs = vfsmod.filtervfs(vfs, encodedir)
599 self.vfs = vfsmod.filtervfs(vfs, encodedir)
600 self.opener = self.vfs
600 self.opener = self.vfs
601
601
602 def join(self, f):
602 def join(self, f):
603 return self.path + b'/' + encodedir(f)
603 return self.path + b'/' + encodedir(f)
604
604
605 def _walk(self, relpath, recurse):
605 def _walk(self, relpath, recurse, undecodable=None):
606 '''yields (revlog_type, unencoded, size)'''
606 '''yields (revlog_type, unencoded, size)'''
607 path = self.path
607 path = self.path
608 if relpath:
608 if relpath:
609 path += b'/' + relpath
609 path += b'/' + relpath
610 striplen = len(self.path) + 1
610 striplen = len(self.path) + 1
611 l = []
611 l = []
612 if self.rawvfs.isdir(path):
612 if self.rawvfs.isdir(path):
613 visit = [path]
613 visit = [path]
614 readdir = self.rawvfs.readdir
614 readdir = self.rawvfs.readdir
615 while visit:
615 while visit:
616 p = visit.pop()
616 p = visit.pop()
617 for f, kind, st in readdir(p, stat=True):
617 for f, kind, st in readdir(p, stat=True):
618 fp = p + b'/' + f
618 fp = p + b'/' + f
619 rl_type = is_revlog(f, kind, st)
619 rl_type = is_revlog(f, kind, st)
620 if rl_type is not None:
620 if rl_type is not None:
621 n = util.pconvert(fp[striplen:])
621 n = util.pconvert(fp[striplen:])
622 l.append((decodedir(n), (rl_type, st.st_size)))
622 l.append((decodedir(n), (rl_type, st.st_size)))
623 elif kind == stat.S_IFDIR and recurse:
623 elif kind == stat.S_IFDIR and recurse:
624 visit.append(fp)
624 visit.append(fp)
625
625
626 l.sort()
626 l.sort()
627 return l
627 return l
628
628
629 def changelog(self, trypending, concurrencychecker=None):
629 def changelog(self, trypending, concurrencychecker=None):
630 return changelog.changelog(
630 return changelog.changelog(
631 self.vfs,
631 self.vfs,
632 trypending=trypending,
632 trypending=trypending,
633 concurrencychecker=concurrencychecker,
633 concurrencychecker=concurrencychecker,
634 )
634 )
635
635
636 def manifestlog(self, repo, storenarrowmatch):
636 def manifestlog(self, repo, storenarrowmatch):
637 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
637 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
638 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
638 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
639
639
640 def datafiles(
640 def datafiles(
641 self, matcher=None, undecodable=None
641 self, matcher=None, undecodable=None
642 ) -> Generator[BaseStoreEntry, None, None]:
642 ) -> Generator[BaseStoreEntry, None, None]:
643 """Like walk, but excluding the changelog and root manifest.
643 """Like walk, but excluding the changelog and root manifest.
644
644
645 When [undecodable] is None, revlogs names that can't be
645 When [undecodable] is None, revlogs names that can't be
646 decoded cause an exception. When it is provided, it should
646 decoded cause an exception. When it is provided, it should
647 be a list and the filenames that can't be decoded are added
647 be a list and the filenames that can't be decoded are added
648 to it instead. This is very rarely needed."""
648 to it instead. This is very rarely needed."""
649 dirs = [
649 dirs = [
650 (b'data', FILEFLAGS_FILELOG),
650 (b'data', FILEFLAGS_FILELOG),
651 (b'meta', FILEFLAGS_MANIFESTLOG),
651 (b'meta', FILEFLAGS_MANIFESTLOG),
652 ]
652 ]
653 for base_dir, rl_type in dirs:
653 for base_dir, rl_type in dirs:
654 files = self._walk(base_dir, True)
654 files = self._walk(base_dir, True, undecodable=undecodable)
655 files = (f for f in files if f[1][0] is not None)
655 files = (f for f in files if f[1][0] is not None)
656 for revlog, details in _gather_revlog(files):
656 for revlog, details in _gather_revlog(files):
657 for ext, (t, s) in sorted(details.items()):
657 for ext, (t, s) in sorted(details.items()):
658 u = revlog + ext
658 u = revlog + ext
659 revlog_target_id = revlog.split(b'/', 1)[1]
659 revlog_target_id = revlog.split(b'/', 1)[1]
660 yield RevlogStoreEntry(
660 yield RevlogStoreEntry(
661 unencoded_path=u,
661 unencoded_path=u,
662 revlog_type=rl_type,
662 revlog_type=rl_type,
663 target_id=revlog_target_id,
663 target_id=revlog_target_id,
664 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
664 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
665 is_volatile=bool(t & FILEFLAGS_VOLATILE),
665 is_volatile=bool(t & FILEFLAGS_VOLATILE),
666 file_size=s,
666 file_size=s,
667 )
667 )
668
668
669 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
669 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
670 files = reversed(self._walk(b'', False))
670 files = reversed(self._walk(b'', False))
671
671
672 changelogs = collections.defaultdict(dict)
672 changelogs = collections.defaultdict(dict)
673 manifestlogs = collections.defaultdict(dict)
673 manifestlogs = collections.defaultdict(dict)
674
674
675 for u, (t, s) in files:
675 for u, (t, s) in files:
676 if u.startswith(b'00changelog'):
676 if u.startswith(b'00changelog'):
677 name, ext = _split_revlog_ext(u)
677 name, ext = _split_revlog_ext(u)
678 changelogs[name][ext] = (t, s)
678 changelogs[name][ext] = (t, s)
679 elif u.startswith(b'00manifest'):
679 elif u.startswith(b'00manifest'):
680 name, ext = _split_revlog_ext(u)
680 name, ext = _split_revlog_ext(u)
681 manifestlogs[name][ext] = (t, s)
681 manifestlogs[name][ext] = (t, s)
682 else:
682 else:
683 yield SimpleStoreEntry(
683 yield SimpleStoreEntry(
684 unencoded_path=u,
684 unencoded_path=u,
685 is_volatile=bool(t & FILEFLAGS_VOLATILE),
685 is_volatile=bool(t & FILEFLAGS_VOLATILE),
686 file_size=s,
686 file_size=s,
687 )
687 )
688 # yield manifest before changelog
688 # yield manifest before changelog
689 top_rl = [
689 top_rl = [
690 (manifestlogs, FILEFLAGS_MANIFESTLOG),
690 (manifestlogs, FILEFLAGS_MANIFESTLOG),
691 (changelogs, FILEFLAGS_CHANGELOG),
691 (changelogs, FILEFLAGS_CHANGELOG),
692 ]
692 ]
693 assert len(manifestlogs) <= 1
693 assert len(manifestlogs) <= 1
694 assert len(changelogs) <= 1
694 assert len(changelogs) <= 1
695 for data, revlog_type in top_rl:
695 for data, revlog_type in top_rl:
696 for revlog, details in sorted(data.items()):
696 for revlog, details in sorted(data.items()):
697 # (keeping ordering so we get 00changelog.i last)
697 # (keeping ordering so we get 00changelog.i last)
698 key = lambda x: _ext_key(x[0])
698 key = lambda x: _ext_key(x[0])
699 for ext, (t, s) in sorted(details.items(), key=key):
699 for ext, (t, s) in sorted(details.items(), key=key):
700 u = revlog + ext
700 u = revlog + ext
701 yield RevlogStoreEntry(
701 yield RevlogStoreEntry(
702 unencoded_path=u,
702 unencoded_path=u,
703 revlog_type=revlog_type,
703 revlog_type=revlog_type,
704 target_id=b'',
704 target_id=b'',
705 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
705 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
706 is_volatile=bool(t & FILEFLAGS_VOLATILE),
706 is_volatile=bool(t & FILEFLAGS_VOLATILE),
707 file_size=s,
707 file_size=s,
708 )
708 )
709
709
710 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
710 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
711 """return files related to data storage (ie: revlogs)
711 """return files related to data storage (ie: revlogs)
712
712
713 yields (file_type, unencoded, size)
713 yields (file_type, unencoded, size)
714
714
715 if a matcher is passed, storage files of only those tracked paths
715 if a matcher is passed, storage files of only those tracked paths
716 are passed with matches the matcher
716 are passed with matches the matcher
717 """
717 """
718 # yield data files first
718 # yield data files first
719 for x in self.datafiles(matcher):
719 for x in self.datafiles(matcher):
720 yield x
720 yield x
721 for x in self.topfiles():
721 for x in self.topfiles():
722 yield x
722 yield x
723
723
724 def copylist(self):
724 def copylist(self):
725 return _data
725 return _data
726
726
727 def write(self, tr):
727 def write(self, tr):
728 pass
728 pass
729
729
730 def invalidatecaches(self):
730 def invalidatecaches(self):
731 pass
731 pass
732
732
733 def markremoved(self, fn):
733 def markremoved(self, fn):
734 pass
734 pass
735
735
736 def __contains__(self, path):
736 def __contains__(self, path):
737 '''Checks if the store contains path'''
737 '''Checks if the store contains path'''
738 path = b"/".join((b"data", path))
738 path = b"/".join((b"data", path))
739 # file?
739 # file?
740 if self.vfs.exists(path + b".i"):
740 if self.vfs.exists(path + b".i"):
741 return True
741 return True
742 # dir?
742 # dir?
743 if not path.endswith(b"/"):
743 if not path.endswith(b"/"):
744 path = path + b"/"
744 path = path + b"/"
745 return self.vfs.exists(path)
745 return self.vfs.exists(path)
746
746
747
747
748 class encodedstore(basicstore):
748 class encodedstore(basicstore):
749 def __init__(self, path, vfstype):
749 def __init__(self, path, vfstype):
750 vfs = vfstype(path + b'/store')
750 vfs = vfstype(path + b'/store')
751 self.path = vfs.base
751 self.path = vfs.base
752 self.createmode = _calcmode(vfs)
752 self.createmode = _calcmode(vfs)
753 vfs.createmode = self.createmode
753 vfs.createmode = self.createmode
754 self.rawvfs = vfs
754 self.rawvfs = vfs
755 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
755 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
756 self.opener = self.vfs
756 self.opener = self.vfs
757
757
758 # note: topfiles would also need a decode phase. It is just that in
758 def _walk(self, relpath, recurse, undecodable=None):
759 # practice we do not have any file outside of `data/` that needs encoding.
759 old = super()._walk(relpath, recurse)
760 # However that might change so we should probably add a test and encoding
760 new = []
761 # decoding for it too. see issue6548
761 for f1, value in old:
762
763 def datafiles(
764 self, matcher=None, undecodable=None
765 ) -> Generator[BaseStoreEntry, None, None]:
766 for entry in super(encodedstore, self).datafiles():
767 try:
762 try:
768 f1 = entry.unencoded_path
769 f2 = decodefilename(f1)
763 f2 = decodefilename(f1)
770 except KeyError:
764 except KeyError:
771 if undecodable is None:
765 if undecodable is None:
772 msg = _(b'undecodable revlog name %s') % f1
766 msg = _(b'undecodable revlog name %s') % f1
773 raise error.StorageError(msg)
767 raise error.StorageError(msg)
774 else:
768 else:
775 undecodable.append(f1)
769 undecodable.append(f1)
776 continue
770 continue
777 if not _matchtrackedpath(f2, matcher):
771 new.append((f2, value))
778 continue
772 return new
779 entry.unencoded_path = f2
773
774 def datafiles(
775 self, matcher=None, undecodable=None
776 ) -> Generator[BaseStoreEntry, None, None]:
777 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
778 for entry in entries:
779 if _matchtrackedpath(entry.unencoded_path, matcher):
780 yield entry
780 yield entry
781
781
782 def join(self, f):
782 def join(self, f):
783 return self.path + b'/' + encodefilename(f)
783 return self.path + b'/' + encodefilename(f)
784
784
785 def copylist(self):
785 def copylist(self):
786 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
786 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
787
787
788
788
789 class fncache:
789 class fncache:
790 # the filename used to be partially encoded
790 # the filename used to be partially encoded
791 # hence the encodedir/decodedir dance
791 # hence the encodedir/decodedir dance
792 def __init__(self, vfs):
792 def __init__(self, vfs):
793 self.vfs = vfs
793 self.vfs = vfs
794 self._ignores = set()
794 self._ignores = set()
795 self.entries = None
795 self.entries = None
796 self._dirty = False
796 self._dirty = False
797 # set of new additions to fncache
797 # set of new additions to fncache
798 self.addls = set()
798 self.addls = set()
799
799
800 def ensureloaded(self, warn=None):
800 def ensureloaded(self, warn=None):
801 """read the fncache file if not already read.
801 """read the fncache file if not already read.
802
802
803 If the file on disk is corrupted, raise. If warn is provided,
803 If the file on disk is corrupted, raise. If warn is provided,
804 warn and keep going instead."""
804 warn and keep going instead."""
805 if self.entries is None:
805 if self.entries is None:
806 self._load(warn)
806 self._load(warn)
807
807
808 def _load(self, warn=None):
808 def _load(self, warn=None):
809 '''fill the entries from the fncache file'''
809 '''fill the entries from the fncache file'''
810 self._dirty = False
810 self._dirty = False
811 try:
811 try:
812 fp = self.vfs(b'fncache', mode=b'rb')
812 fp = self.vfs(b'fncache', mode=b'rb')
813 except IOError:
813 except IOError:
814 # skip nonexistent file
814 # skip nonexistent file
815 self.entries = set()
815 self.entries = set()
816 return
816 return
817
817
818 self.entries = set()
818 self.entries = set()
819 chunk = b''
819 chunk = b''
820 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
820 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
821 chunk += c
821 chunk += c
822 try:
822 try:
823 p = chunk.rindex(b'\n')
823 p = chunk.rindex(b'\n')
824 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
824 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
825 chunk = chunk[p + 1 :]
825 chunk = chunk[p + 1 :]
826 except ValueError:
826 except ValueError:
827 # substring '\n' not found, maybe the entry is bigger than the
827 # substring '\n' not found, maybe the entry is bigger than the
828 # chunksize, so let's keep iterating
828 # chunksize, so let's keep iterating
829 pass
829 pass
830
830
831 if chunk:
831 if chunk:
832 msg = _(b"fncache does not ends with a newline")
832 msg = _(b"fncache does not ends with a newline")
833 if warn:
833 if warn:
834 warn(msg + b'\n')
834 warn(msg + b'\n')
835 else:
835 else:
836 raise error.Abort(
836 raise error.Abort(
837 msg,
837 msg,
838 hint=_(
838 hint=_(
839 b"use 'hg debugrebuildfncache' to "
839 b"use 'hg debugrebuildfncache' to "
840 b"rebuild the fncache"
840 b"rebuild the fncache"
841 ),
841 ),
842 )
842 )
843 self._checkentries(fp, warn)
843 self._checkentries(fp, warn)
844 fp.close()
844 fp.close()
845
845
846 def _checkentries(self, fp, warn):
846 def _checkentries(self, fp, warn):
847 """make sure there is no empty string in entries"""
847 """make sure there is no empty string in entries"""
848 if b'' in self.entries:
848 if b'' in self.entries:
849 fp.seek(0)
849 fp.seek(0)
850 for n, line in enumerate(fp):
850 for n, line in enumerate(fp):
851 if not line.rstrip(b'\n'):
851 if not line.rstrip(b'\n'):
852 t = _(b'invalid entry in fncache, line %d') % (n + 1)
852 t = _(b'invalid entry in fncache, line %d') % (n + 1)
853 if warn:
853 if warn:
854 warn(t + b'\n')
854 warn(t + b'\n')
855 else:
855 else:
856 raise error.Abort(t)
856 raise error.Abort(t)
857
857
858 def write(self, tr):
858 def write(self, tr):
859 if self._dirty:
859 if self._dirty:
860 assert self.entries is not None
860 assert self.entries is not None
861 self.entries = self.entries | self.addls
861 self.entries = self.entries | self.addls
862 self.addls = set()
862 self.addls = set()
863 tr.addbackup(b'fncache')
863 tr.addbackup(b'fncache')
864 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
864 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
865 if self.entries:
865 if self.entries:
866 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
866 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
867 fp.close()
867 fp.close()
868 self._dirty = False
868 self._dirty = False
869 if self.addls:
869 if self.addls:
870 # if we have just new entries, let's append them to the fncache
870 # if we have just new entries, let's append them to the fncache
871 tr.addbackup(b'fncache')
871 tr.addbackup(b'fncache')
872 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
872 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
873 if self.addls:
873 if self.addls:
874 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
874 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
875 fp.close()
875 fp.close()
876 self.entries = None
876 self.entries = None
877 self.addls = set()
877 self.addls = set()
878
878
879 def addignore(self, fn):
879 def addignore(self, fn):
880 self._ignores.add(fn)
880 self._ignores.add(fn)
881
881
882 def add(self, fn):
882 def add(self, fn):
883 if fn in self._ignores:
883 if fn in self._ignores:
884 return
884 return
885 if self.entries is None:
885 if self.entries is None:
886 self._load()
886 self._load()
887 if fn not in self.entries:
887 if fn not in self.entries:
888 self.addls.add(fn)
888 self.addls.add(fn)
889
889
890 def remove(self, fn):
890 def remove(self, fn):
891 if self.entries is None:
891 if self.entries is None:
892 self._load()
892 self._load()
893 if fn in self.addls:
893 if fn in self.addls:
894 self.addls.remove(fn)
894 self.addls.remove(fn)
895 return
895 return
896 try:
896 try:
897 self.entries.remove(fn)
897 self.entries.remove(fn)
898 self._dirty = True
898 self._dirty = True
899 except KeyError:
899 except KeyError:
900 pass
900 pass
901
901
902 def __contains__(self, fn):
902 def __contains__(self, fn):
903 if fn in self.addls:
903 if fn in self.addls:
904 return True
904 return True
905 if self.entries is None:
905 if self.entries is None:
906 self._load()
906 self._load()
907 return fn in self.entries
907 return fn in self.entries
908
908
909 def __iter__(self):
909 def __iter__(self):
910 if self.entries is None:
910 if self.entries is None:
911 self._load()
911 self._load()
912 return iter(self.entries | self.addls)
912 return iter(self.entries | self.addls)
913
913
914
914
915 class _fncachevfs(vfsmod.proxyvfs):
915 class _fncachevfs(vfsmod.proxyvfs):
916 def __init__(self, vfs, fnc, encode):
916 def __init__(self, vfs, fnc, encode):
917 vfsmod.proxyvfs.__init__(self, vfs)
917 vfsmod.proxyvfs.__init__(self, vfs)
918 self.fncache = fnc
918 self.fncache = fnc
919 self.encode = encode
919 self.encode = encode
920
920
921 def __call__(self, path, mode=b'r', *args, **kw):
921 def __call__(self, path, mode=b'r', *args, **kw):
922 encoded = self.encode(path)
922 encoded = self.encode(path)
923 if (
923 if (
924 mode not in (b'r', b'rb')
924 mode not in (b'r', b'rb')
925 and (path.startswith(b'data/') or path.startswith(b'meta/'))
925 and (path.startswith(b'data/') or path.startswith(b'meta/'))
926 and revlog_type(path) is not None
926 and revlog_type(path) is not None
927 ):
927 ):
928 # do not trigger a fncache load when adding a file that already is
928 # do not trigger a fncache load when adding a file that already is
929 # known to exist.
929 # known to exist.
930 notload = self.fncache.entries is None and self.vfs.exists(encoded)
930 notload = self.fncache.entries is None and self.vfs.exists(encoded)
931 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
931 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
932 # when appending to an existing file, if the file has size zero,
932 # when appending to an existing file, if the file has size zero,
933 # it should be considered as missing. Such zero-size files are
933 # it should be considered as missing. Such zero-size files are
934 # the result of truncation when a transaction is aborted.
934 # the result of truncation when a transaction is aborted.
935 notload = False
935 notload = False
936 if not notload:
936 if not notload:
937 self.fncache.add(path)
937 self.fncache.add(path)
938 return self.vfs(encoded, mode, *args, **kw)
938 return self.vfs(encoded, mode, *args, **kw)
939
939
940 def join(self, path):
940 def join(self, path):
941 if path:
941 if path:
942 return self.vfs.join(self.encode(path))
942 return self.vfs.join(self.encode(path))
943 else:
943 else:
944 return self.vfs.join(path)
944 return self.vfs.join(path)
945
945
946 def register_file(self, path):
946 def register_file(self, path):
947 """generic hook point to lets fncache steer its stew"""
947 """generic hook point to lets fncache steer its stew"""
948 if path.startswith(b'data/') or path.startswith(b'meta/'):
948 if path.startswith(b'data/') or path.startswith(b'meta/'):
949 self.fncache.add(path)
949 self.fncache.add(path)
950
950
951
951
952 class fncachestore(basicstore):
952 class fncachestore(basicstore):
953 def __init__(self, path, vfstype, dotencode):
953 def __init__(self, path, vfstype, dotencode):
954 if dotencode:
954 if dotencode:
955 encode = _pathencode
955 encode = _pathencode
956 else:
956 else:
957 encode = _plainhybridencode
957 encode = _plainhybridencode
958 self.encode = encode
958 self.encode = encode
959 vfs = vfstype(path + b'/store')
959 vfs = vfstype(path + b'/store')
960 self.path = vfs.base
960 self.path = vfs.base
961 self.pathsep = self.path + b'/'
961 self.pathsep = self.path + b'/'
962 self.createmode = _calcmode(vfs)
962 self.createmode = _calcmode(vfs)
963 vfs.createmode = self.createmode
963 vfs.createmode = self.createmode
964 self.rawvfs = vfs
964 self.rawvfs = vfs
965 fnc = fncache(vfs)
965 fnc = fncache(vfs)
966 self.fncache = fnc
966 self.fncache = fnc
967 self.vfs = _fncachevfs(vfs, fnc, encode)
967 self.vfs = _fncachevfs(vfs, fnc, encode)
968 self.opener = self.vfs
968 self.opener = self.vfs
969
969
970 def join(self, f):
970 def join(self, f):
971 return self.pathsep + self.encode(f)
971 return self.pathsep + self.encode(f)
972
972
973 def getsize(self, path):
973 def getsize(self, path):
974 return self.rawvfs.stat(path).st_size
974 return self.rawvfs.stat(path).st_size
975
975
976 def datafiles(
976 def datafiles(
977 self, matcher=None, undecodable=None
977 self, matcher=None, undecodable=None
978 ) -> Generator[BaseStoreEntry, None, None]:
978 ) -> Generator[BaseStoreEntry, None, None]:
979 files = ((f, revlog_type(f)) for f in self.fncache)
979 files = ((f, revlog_type(f)) for f in self.fncache)
980 # Note: all files in fncache should be revlog related, However the
980 # Note: all files in fncache should be revlog related, However the
981 # fncache might contains such file added by previous version of
981 # fncache might contains such file added by previous version of
982 # Mercurial.
982 # Mercurial.
983 files = (f for f in files if f[1] is not None)
983 files = (f for f in files if f[1] is not None)
984 by_revlog = _gather_revlog(files)
984 by_revlog = _gather_revlog(files)
985 for revlog, details in by_revlog:
985 for revlog, details in by_revlog:
986 if revlog.startswith(b'data/'):
986 if revlog.startswith(b'data/'):
987 rl_type = FILEFLAGS_FILELOG
987 rl_type = FILEFLAGS_FILELOG
988 revlog_target_id = revlog.split(b'/', 1)[1]
988 revlog_target_id = revlog.split(b'/', 1)[1]
989 elif revlog.startswith(b'meta/'):
989 elif revlog.startswith(b'meta/'):
990 rl_type = FILEFLAGS_MANIFESTLOG
990 rl_type = FILEFLAGS_MANIFESTLOG
991 # drop the initial directory and the `00manifest` file part
991 # drop the initial directory and the `00manifest` file part
992 tmp = revlog.split(b'/', 1)[1]
992 tmp = revlog.split(b'/', 1)[1]
993 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
993 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
994 else:
994 else:
995 # unreachable
995 # unreachable
996 assert False, revlog
996 assert False, revlog
997 for ext, t in sorted(details.items()):
997 for ext, t in sorted(details.items()):
998 f = revlog + ext
998 f = revlog + ext
999 if not _matchtrackedpath(f, matcher):
999 if not _matchtrackedpath(f, matcher):
1000 continue
1000 continue
1001 yield RevlogStoreEntry(
1001 yield RevlogStoreEntry(
1002 unencoded_path=f,
1002 unencoded_path=f,
1003 revlog_type=rl_type,
1003 revlog_type=rl_type,
1004 target_id=revlog_target_id,
1004 target_id=revlog_target_id,
1005 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1005 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1006 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1006 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1007 )
1007 )
1008
1008
1009 def copylist(self):
1009 def copylist(self):
1010 d = (
1010 d = (
1011 b'bookmarks',
1011 b'bookmarks',
1012 b'narrowspec',
1012 b'narrowspec',
1013 b'data',
1013 b'data',
1014 b'meta',
1014 b'meta',
1015 b'dh',
1015 b'dh',
1016 b'fncache',
1016 b'fncache',
1017 b'phaseroots',
1017 b'phaseroots',
1018 b'obsstore',
1018 b'obsstore',
1019 b'00manifest.d',
1019 b'00manifest.d',
1020 b'00manifest.i',
1020 b'00manifest.i',
1021 b'00changelog.d',
1021 b'00changelog.d',
1022 b'00changelog.i',
1022 b'00changelog.i',
1023 b'requires',
1023 b'requires',
1024 )
1024 )
1025 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1025 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1026
1026
1027 def write(self, tr):
1027 def write(self, tr):
1028 self.fncache.write(tr)
1028 self.fncache.write(tr)
1029
1029
1030 def invalidatecaches(self):
1030 def invalidatecaches(self):
1031 self.fncache.entries = None
1031 self.fncache.entries = None
1032 self.fncache.addls = set()
1032 self.fncache.addls = set()
1033
1033
1034 def markremoved(self, fn):
1034 def markremoved(self, fn):
1035 self.fncache.remove(fn)
1035 self.fncache.remove(fn)
1036
1036
1037 def _exists(self, f):
1037 def _exists(self, f):
1038 ef = self.encode(f)
1038 ef = self.encode(f)
1039 try:
1039 try:
1040 self.getsize(ef)
1040 self.getsize(ef)
1041 return True
1041 return True
1042 except FileNotFoundError:
1042 except FileNotFoundError:
1043 return False
1043 return False
1044
1044
1045 def __contains__(self, path):
1045 def __contains__(self, path):
1046 '''Checks if the store contains path'''
1046 '''Checks if the store contains path'''
1047 path = b"/".join((b"data", path))
1047 path = b"/".join((b"data", path))
1048 # check for files (exact match)
1048 # check for files (exact match)
1049 e = path + b'.i'
1049 e = path + b'.i'
1050 if e in self.fncache and self._exists(e):
1050 if e in self.fncache and self._exists(e):
1051 return True
1051 return True
1052 # now check for directories (prefix match)
1052 # now check for directories (prefix match)
1053 if not path.endswith(b'/'):
1053 if not path.endswith(b'/'):
1054 path += b'/'
1054 path += b'/'
1055 for e in self.fncache:
1055 for e in self.fncache:
1056 if e.startswith(path) and self._exists(e):
1056 if e.startswith(path) and self._exists(e):
1057 return True
1057 return True
1058 return False
1058 return False
General Comments 0
You need to be logged in to leave comments. Login now