##// END OF EJS Templates
store: actually tag tree manifest revlogs as manifest revlogs...
marmoute -
r51375:1fc25227 default
parent child Browse files
Show More
@@ -1,1031 +1,1043 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 REVLOG_FILES_LONG_EXT = (
399 REVLOG_FILES_LONG_EXT = (
400 b'.nd',
400 b'.nd',
401 b'.idx',
401 b'.idx',
402 b'.dat',
402 b'.dat',
403 b'.sda',
403 b'.sda',
404 )
404 )
405 # files that are "volatile" and might change between listing and streaming
405 # files that are "volatile" and might change between listing and streaming
406 #
406 #
407 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 # deleted.
408 # deleted.
409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410
410
411 # some exception to the above matching
411 # some exception to the above matching
412 #
412 #
413 # XXX This is currently not in use because of issue6542
413 # XXX This is currently not in use because of issue6542
414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415
415
416
416
417 def is_revlog(f, kind, st):
417 def is_revlog(f, kind, st):
418 if kind != stat.S_IFREG:
418 if kind != stat.S_IFREG:
419 return None
419 return None
420 return revlog_type(f)
420 return revlog_type(f)
421
421
422
422
423 def revlog_type(f):
423 def revlog_type(f):
424 # XXX we need to filter `undo.` created by the transaction here, however
424 # XXX we need to filter `undo.` created by the transaction here, however
425 # being naive about it also filter revlog for `undo.*` files, leading to
425 # being naive about it also filter revlog for `undo.*` files, leading to
426 # issue6542. So we no longer use EXCLUDED.
426 # issue6542. So we no longer use EXCLUDED.
427 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 return FILEFLAGS_REVLOG_MAIN
428 return FILEFLAGS_REVLOG_MAIN
429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 t = FILETYPE_FILELOG_OTHER
430 t = FILETYPE_FILELOG_OTHER
431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 t |= FILEFLAGS_VOLATILE
432 t |= FILEFLAGS_VOLATILE
433 return t
433 return t
434 return None
434 return None
435
435
436
436
437 # the file is part of changelog data
437 # the file is part of changelog data
438 FILEFLAGS_CHANGELOG = 1 << 13
438 FILEFLAGS_CHANGELOG = 1 << 13
439 # the file is part of manifest data
439 # the file is part of manifest data
440 FILEFLAGS_MANIFESTLOG = 1 << 12
440 FILEFLAGS_MANIFESTLOG = 1 << 12
441 # the file is part of filelog data
441 # the file is part of filelog data
442 FILEFLAGS_FILELOG = 1 << 11
442 FILEFLAGS_FILELOG = 1 << 11
443 # file that are not directly part of a revlog
443 # file that are not directly part of a revlog
444 FILEFLAGS_OTHER = 1 << 10
444 FILEFLAGS_OTHER = 1 << 10
445
445
446 # the main entry point for a revlog
446 # the main entry point for a revlog
447 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 # a secondary file for a revlog
448 # a secondary file for a revlog
449 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 FILEFLAGS_REVLOG_OTHER = 1 << 0
450
450
451 # files that are "volatile" and might change between listing and streaming
451 # files that are "volatile" and might change between listing and streaming
452 FILEFLAGS_VOLATILE = 1 << 20
452 FILEFLAGS_VOLATILE = 1 << 20
453
453
454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 FILETYPE_OTHER = FILEFLAGS_OTHER
460 FILETYPE_OTHER = FILEFLAGS_OTHER
461
461
462
462
463 @attr.s(slots=True, init=False)
463 @attr.s(slots=True, init=False)
464 class BaseStoreEntry:
464 class BaseStoreEntry:
465 """An entry in the store
465 """An entry in the store
466
466
467 This is returned by `store.walk` and represent some data in the store."""
467 This is returned by `store.walk` and represent some data in the store."""
468
468
469 unencoded_path = attr.ib()
469 unencoded_path = attr.ib()
470 _is_volatile = attr.ib(default=False)
470 _is_volatile = attr.ib(default=False)
471 _file_size = attr.ib(default=None)
471 _file_size = attr.ib(default=None)
472
472
473 def __init__(
473 def __init__(
474 self,
474 self,
475 unencoded_path,
475 unencoded_path,
476 is_volatile=False,
476 is_volatile=False,
477 file_size=None,
477 file_size=None,
478 ):
478 ):
479 self.unencoded_path = unencoded_path
479 self.unencoded_path = unencoded_path
480 self._is_volatile = is_volatile
480 self._is_volatile = is_volatile
481 self._file_size = file_size
481 self._file_size = file_size
482
482
483 def files(self):
483 def files(self):
484 return [
484 return [
485 StoreFile(
485 StoreFile(
486 unencoded_path=self.unencoded_path,
486 unencoded_path=self.unencoded_path,
487 file_size=self._file_size,
487 file_size=self._file_size,
488 is_volatile=self._is_volatile,
488 is_volatile=self._is_volatile,
489 )
489 )
490 ]
490 ]
491
491
492
492
493 @attr.s(slots=True, init=False)
493 @attr.s(slots=True, init=False)
494 class SimpleStoreEntry(BaseStoreEntry):
494 class SimpleStoreEntry(BaseStoreEntry):
495 """A generic entry in the store"""
495 """A generic entry in the store"""
496
496
497 is_revlog = False
497 is_revlog = False
498
498
499
499
500 @attr.s(slots=True, init=False)
500 @attr.s(slots=True, init=False)
501 class RevlogStoreEntry(BaseStoreEntry):
501 class RevlogStoreEntry(BaseStoreEntry):
502 """A revlog entry in the store"""
502 """A revlog entry in the store"""
503
503
504 is_revlog = True
504 is_revlog = True
505 revlog_type = attr.ib(default=None)
505 revlog_type = attr.ib(default=None)
506 is_revlog_main = attr.ib(default=None)
506 is_revlog_main = attr.ib(default=None)
507
507
508 def __init__(
508 def __init__(
509 self,
509 self,
510 unencoded_path,
510 unencoded_path,
511 revlog_type,
511 revlog_type,
512 is_revlog_main=False,
512 is_revlog_main=False,
513 is_volatile=False,
513 is_volatile=False,
514 file_size=None,
514 file_size=None,
515 ):
515 ):
516 super().__init__(
516 super().__init__(
517 unencoded_path=unencoded_path,
517 unencoded_path=unencoded_path,
518 is_volatile=is_volatile,
518 is_volatile=is_volatile,
519 file_size=file_size,
519 file_size=file_size,
520 )
520 )
521 self.revlog_type = revlog_type
521 self.revlog_type = revlog_type
522 self.is_revlog_main = is_revlog_main
522 self.is_revlog_main = is_revlog_main
523
523
524
524
525 @attr.s(slots=True)
525 @attr.s(slots=True)
526 class StoreFile:
526 class StoreFile:
527 """a file matching an entry"""
527 """a file matching an entry"""
528
528
529 unencoded_path = attr.ib()
529 unencoded_path = attr.ib()
530 _file_size = attr.ib(default=False)
530 _file_size = attr.ib(default=False)
531 is_volatile = attr.ib(default=False)
531 is_volatile = attr.ib(default=False)
532
532
533 def file_size(self, vfs):
533 def file_size(self, vfs):
534 if self._file_size is not None:
534 if self._file_size is not None:
535 return self._file_size
535 return self._file_size
536 try:
536 try:
537 return vfs.stat(self.unencoded_path).st_size
537 return vfs.stat(self.unencoded_path).st_size
538 except FileNotFoundError:
538 except FileNotFoundError:
539 return 0
539 return 0
540
540
541
541
542 def _gather_revlog(files_data):
542 def _gather_revlog(files_data):
543 """group files per revlog prefix
543 """group files per revlog prefix
544
544
545 The returns a two level nested dict. The top level key is the revlog prefix
545 The returns a two level nested dict. The top level key is the revlog prefix
546 without extension, the second level is all the file "suffix" that were
546 without extension, the second level is all the file "suffix" that were
547 seen for this revlog and arbitrary file data as value.
547 seen for this revlog and arbitrary file data as value.
548 """
548 """
549 revlogs = collections.defaultdict(dict)
549 revlogs = collections.defaultdict(dict)
550 for u, value in files_data:
550 for u, value in files_data:
551 name, ext = _split_revlog_ext(u)
551 name, ext = _split_revlog_ext(u)
552 revlogs[name][ext] = value
552 revlogs[name][ext] = value
553 return sorted(revlogs.items())
553 return sorted(revlogs.items())
554
554
555
555
556 def _split_revlog_ext(filename):
556 def _split_revlog_ext(filename):
557 """split the revlog file prefix from the variable extension"""
557 """split the revlog file prefix from the variable extension"""
558 if filename.endswith(REVLOG_FILES_LONG_EXT):
558 if filename.endswith(REVLOG_FILES_LONG_EXT):
559 char = b'-'
559 char = b'-'
560 else:
560 else:
561 char = b'.'
561 char = b'.'
562 idx = filename.rfind(char)
562 idx = filename.rfind(char)
563 return filename[:idx], filename[idx:]
563 return filename[:idx], filename[idx:]
564
564
565
565
566 def _ext_key(ext):
566 def _ext_key(ext):
567 """a key to order revlog suffix
567 """a key to order revlog suffix
568
568
569 important to issue .i after other entry."""
569 important to issue .i after other entry."""
570 # the only important part of this order is to keep the `.i` last.
570 # the only important part of this order is to keep the `.i` last.
571 if ext.endswith(b'.n'):
571 if ext.endswith(b'.n'):
572 return (0, ext)
572 return (0, ext)
573 elif ext.endswith(b'.nd'):
573 elif ext.endswith(b'.nd'):
574 return (10, ext)
574 return (10, ext)
575 elif ext.endswith(b'.d'):
575 elif ext.endswith(b'.d'):
576 return (20, ext)
576 return (20, ext)
577 elif ext.endswith(b'.i'):
577 elif ext.endswith(b'.i'):
578 return (50, ext)
578 return (50, ext)
579 else:
579 else:
580 return (40, ext)
580 return (40, ext)
581
581
582
582
583 class basicstore:
583 class basicstore:
584 '''base class for local repository stores'''
584 '''base class for local repository stores'''
585
585
586 def __init__(self, path, vfstype):
586 def __init__(self, path, vfstype):
587 vfs = vfstype(path)
587 vfs = vfstype(path)
588 self.path = vfs.base
588 self.path = vfs.base
589 self.createmode = _calcmode(vfs)
589 self.createmode = _calcmode(vfs)
590 vfs.createmode = self.createmode
590 vfs.createmode = self.createmode
591 self.rawvfs = vfs
591 self.rawvfs = vfs
592 self.vfs = vfsmod.filtervfs(vfs, encodedir)
592 self.vfs = vfsmod.filtervfs(vfs, encodedir)
593 self.opener = self.vfs
593 self.opener = self.vfs
594
594
595 def join(self, f):
595 def join(self, f):
596 return self.path + b'/' + encodedir(f)
596 return self.path + b'/' + encodedir(f)
597
597
598 def _walk(self, relpath, recurse):
598 def _walk(self, relpath, recurse):
599 '''yields (revlog_type, unencoded, size)'''
599 '''yields (revlog_type, unencoded, size)'''
600 path = self.path
600 path = self.path
601 if relpath:
601 if relpath:
602 path += b'/' + relpath
602 path += b'/' + relpath
603 striplen = len(self.path) + 1
603 striplen = len(self.path) + 1
604 l = []
604 l = []
605 if self.rawvfs.isdir(path):
605 if self.rawvfs.isdir(path):
606 visit = [path]
606 visit = [path]
607 readdir = self.rawvfs.readdir
607 readdir = self.rawvfs.readdir
608 while visit:
608 while visit:
609 p = visit.pop()
609 p = visit.pop()
610 for f, kind, st in readdir(p, stat=True):
610 for f, kind, st in readdir(p, stat=True):
611 fp = p + b'/' + f
611 fp = p + b'/' + f
612 rl_type = is_revlog(f, kind, st)
612 rl_type = is_revlog(f, kind, st)
613 if rl_type is not None:
613 if rl_type is not None:
614 n = util.pconvert(fp[striplen:])
614 n = util.pconvert(fp[striplen:])
615 l.append((decodedir(n), (rl_type, st.st_size)))
615 l.append((decodedir(n), (rl_type, st.st_size)))
616 elif kind == stat.S_IFDIR and recurse:
616 elif kind == stat.S_IFDIR and recurse:
617 visit.append(fp)
617 visit.append(fp)
618
618
619 l.sort()
619 l.sort()
620 return l
620 return l
621
621
622 def changelog(self, trypending, concurrencychecker=None):
622 def changelog(self, trypending, concurrencychecker=None):
623 return changelog.changelog(
623 return changelog.changelog(
624 self.vfs,
624 self.vfs,
625 trypending=trypending,
625 trypending=trypending,
626 concurrencychecker=concurrencychecker,
626 concurrencychecker=concurrencychecker,
627 )
627 )
628
628
629 def manifestlog(self, repo, storenarrowmatch):
629 def manifestlog(self, repo, storenarrowmatch):
630 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
630 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
631 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
631 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
632
632
633 def datafiles(
633 def datafiles(
634 self, matcher=None, undecodable=None
634 self, matcher=None, undecodable=None
635 ) -> Generator[BaseStoreEntry, None, None]:
635 ) -> Generator[BaseStoreEntry, None, None]:
636 """Like walk, but excluding the changelog and root manifest.
636 """Like walk, but excluding the changelog and root manifest.
637
637
638 When [undecodable] is None, revlogs names that can't be
638 When [undecodable] is None, revlogs names that can't be
639 decoded cause an exception. When it is provided, it should
639 decoded cause an exception. When it is provided, it should
640 be a list and the filenames that can't be decoded are added
640 be a list and the filenames that can't be decoded are added
641 to it instead. This is very rarely needed."""
641 to it instead. This is very rarely needed."""
642 files = self._walk(b'data', True) + self._walk(b'meta', True)
642 dirs = [
643 files = (f for f in files if f[1][0] is not None)
643 (b'data', FILEFLAGS_FILELOG),
644 for revlog, details in _gather_revlog(files):
644 (b'meta', FILEFLAGS_MANIFESTLOG),
645 for ext, (t, s) in sorted(details.items()):
645 ]
646 u = revlog + ext
646 for base_dir, rl_type in dirs:
647 yield RevlogStoreEntry(
647 files = self._walk(base_dir, True)
648 unencoded_path=u,
648 files = (f for f in files if f[1][0] is not None)
649 revlog_type=FILEFLAGS_FILELOG,
649 for revlog, details in _gather_revlog(files):
650 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
650 for ext, (t, s) in sorted(details.items()):
651 is_volatile=bool(t & FILEFLAGS_VOLATILE),
651 u = revlog + ext
652 file_size=s,
652 yield RevlogStoreEntry(
653 )
653 unencoded_path=u,
654 revlog_type=rl_type,
655 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
656 is_volatile=bool(t & FILEFLAGS_VOLATILE),
657 file_size=s,
658 )
654
659
655 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
660 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
656 files = reversed(self._walk(b'', False))
661 files = reversed(self._walk(b'', False))
657
662
658 changelogs = collections.defaultdict(dict)
663 changelogs = collections.defaultdict(dict)
659 manifestlogs = collections.defaultdict(dict)
664 manifestlogs = collections.defaultdict(dict)
660
665
661 for u, (t, s) in files:
666 for u, (t, s) in files:
662 if u.startswith(b'00changelog'):
667 if u.startswith(b'00changelog'):
663 name, ext = _split_revlog_ext(u)
668 name, ext = _split_revlog_ext(u)
664 changelogs[name][ext] = (t, s)
669 changelogs[name][ext] = (t, s)
665 elif u.startswith(b'00manifest'):
670 elif u.startswith(b'00manifest'):
666 name, ext = _split_revlog_ext(u)
671 name, ext = _split_revlog_ext(u)
667 manifestlogs[name][ext] = (t, s)
672 manifestlogs[name][ext] = (t, s)
668 else:
673 else:
669 yield SimpleStoreEntry(
674 yield SimpleStoreEntry(
670 unencoded_path=u,
675 unencoded_path=u,
671 is_volatile=bool(t & FILEFLAGS_VOLATILE),
676 is_volatile=bool(t & FILEFLAGS_VOLATILE),
672 file_size=s,
677 file_size=s,
673 )
678 )
674 # yield manifest before changelog
679 # yield manifest before changelog
675 top_rl = [
680 top_rl = [
676 (manifestlogs, FILEFLAGS_MANIFESTLOG),
681 (manifestlogs, FILEFLAGS_MANIFESTLOG),
677 (changelogs, FILEFLAGS_CHANGELOG),
682 (changelogs, FILEFLAGS_CHANGELOG),
678 ]
683 ]
679 assert len(manifestlogs) <= 1
684 assert len(manifestlogs) <= 1
680 assert len(changelogs) <= 1
685 assert len(changelogs) <= 1
681 for data, revlog_type in top_rl:
686 for data, revlog_type in top_rl:
682 for revlog, details in sorted(data.items()):
687 for revlog, details in sorted(data.items()):
683 # (keeping ordering so we get 00changelog.i last)
688 # (keeping ordering so we get 00changelog.i last)
684 key = lambda x: _ext_key(x[0])
689 key = lambda x: _ext_key(x[0])
685 for ext, (t, s) in sorted(details.items(), key=key):
690 for ext, (t, s) in sorted(details.items(), key=key):
686 u = revlog + ext
691 u = revlog + ext
687 yield RevlogStoreEntry(
692 yield RevlogStoreEntry(
688 unencoded_path=u,
693 unencoded_path=u,
689 revlog_type=revlog_type,
694 revlog_type=revlog_type,
690 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
695 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
691 is_volatile=bool(t & FILEFLAGS_VOLATILE),
696 is_volatile=bool(t & FILEFLAGS_VOLATILE),
692 file_size=s,
697 file_size=s,
693 )
698 )
694
699
695 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
700 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
696 """return files related to data storage (ie: revlogs)
701 """return files related to data storage (ie: revlogs)
697
702
698 yields (file_type, unencoded, size)
703 yields (file_type, unencoded, size)
699
704
700 if a matcher is passed, storage files of only those tracked paths
705 if a matcher is passed, storage files of only those tracked paths
701 are passed with matches the matcher
706 are passed with matches the matcher
702 """
707 """
703 # yield data files first
708 # yield data files first
704 for x in self.datafiles(matcher):
709 for x in self.datafiles(matcher):
705 yield x
710 yield x
706 for x in self.topfiles():
711 for x in self.topfiles():
707 yield x
712 yield x
708
713
709 def copylist(self):
714 def copylist(self):
710 return _data
715 return _data
711
716
712 def write(self, tr):
717 def write(self, tr):
713 pass
718 pass
714
719
715 def invalidatecaches(self):
720 def invalidatecaches(self):
716 pass
721 pass
717
722
718 def markremoved(self, fn):
723 def markremoved(self, fn):
719 pass
724 pass
720
725
721 def __contains__(self, path):
726 def __contains__(self, path):
722 '''Checks if the store contains path'''
727 '''Checks if the store contains path'''
723 path = b"/".join((b"data", path))
728 path = b"/".join((b"data", path))
724 # file?
729 # file?
725 if self.vfs.exists(path + b".i"):
730 if self.vfs.exists(path + b".i"):
726 return True
731 return True
727 # dir?
732 # dir?
728 if not path.endswith(b"/"):
733 if not path.endswith(b"/"):
729 path = path + b"/"
734 path = path + b"/"
730 return self.vfs.exists(path)
735 return self.vfs.exists(path)
731
736
732
737
733 class encodedstore(basicstore):
738 class encodedstore(basicstore):
734 def __init__(self, path, vfstype):
739 def __init__(self, path, vfstype):
735 vfs = vfstype(path + b'/store')
740 vfs = vfstype(path + b'/store')
736 self.path = vfs.base
741 self.path = vfs.base
737 self.createmode = _calcmode(vfs)
742 self.createmode = _calcmode(vfs)
738 vfs.createmode = self.createmode
743 vfs.createmode = self.createmode
739 self.rawvfs = vfs
744 self.rawvfs = vfs
740 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
745 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
741 self.opener = self.vfs
746 self.opener = self.vfs
742
747
743 # note: topfiles would also need a decode phase. It is just that in
748 # note: topfiles would also need a decode phase. It is just that in
744 # practice we do not have any file outside of `data/` that needs encoding.
749 # practice we do not have any file outside of `data/` that needs encoding.
745 # However that might change so we should probably add a test and encoding
750 # However that might change so we should probably add a test and encoding
746 # decoding for it too. see issue6548
751 # decoding for it too. see issue6548
747
752
748 def datafiles(
753 def datafiles(
749 self, matcher=None, undecodable=None
754 self, matcher=None, undecodable=None
750 ) -> Generator[BaseStoreEntry, None, None]:
755 ) -> Generator[BaseStoreEntry, None, None]:
751 for entry in super(encodedstore, self).datafiles():
756 for entry in super(encodedstore, self).datafiles():
752 try:
757 try:
753 f1 = entry.unencoded_path
758 f1 = entry.unencoded_path
754 f2 = decodefilename(f1)
759 f2 = decodefilename(f1)
755 except KeyError:
760 except KeyError:
756 if undecodable is None:
761 if undecodable is None:
757 msg = _(b'undecodable revlog name %s') % f1
762 msg = _(b'undecodable revlog name %s') % f1
758 raise error.StorageError(msg)
763 raise error.StorageError(msg)
759 else:
764 else:
760 undecodable.append(f1)
765 undecodable.append(f1)
761 continue
766 continue
762 if not _matchtrackedpath(f2, matcher):
767 if not _matchtrackedpath(f2, matcher):
763 continue
768 continue
764 entry.unencoded_path = f2
769 entry.unencoded_path = f2
765 yield entry
770 yield entry
766
771
767 def join(self, f):
772 def join(self, f):
768 return self.path + b'/' + encodefilename(f)
773 return self.path + b'/' + encodefilename(f)
769
774
770 def copylist(self):
775 def copylist(self):
771 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
776 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
772
777
773
778
774 class fncache:
779 class fncache:
775 # the filename used to be partially encoded
780 # the filename used to be partially encoded
776 # hence the encodedir/decodedir dance
781 # hence the encodedir/decodedir dance
777 def __init__(self, vfs):
782 def __init__(self, vfs):
778 self.vfs = vfs
783 self.vfs = vfs
779 self._ignores = set()
784 self._ignores = set()
780 self.entries = None
785 self.entries = None
781 self._dirty = False
786 self._dirty = False
782 # set of new additions to fncache
787 # set of new additions to fncache
783 self.addls = set()
788 self.addls = set()
784
789
785 def ensureloaded(self, warn=None):
790 def ensureloaded(self, warn=None):
786 """read the fncache file if not already read.
791 """read the fncache file if not already read.
787
792
788 If the file on disk is corrupted, raise. If warn is provided,
793 If the file on disk is corrupted, raise. If warn is provided,
789 warn and keep going instead."""
794 warn and keep going instead."""
790 if self.entries is None:
795 if self.entries is None:
791 self._load(warn)
796 self._load(warn)
792
797
793 def _load(self, warn=None):
798 def _load(self, warn=None):
794 '''fill the entries from the fncache file'''
799 '''fill the entries from the fncache file'''
795 self._dirty = False
800 self._dirty = False
796 try:
801 try:
797 fp = self.vfs(b'fncache', mode=b'rb')
802 fp = self.vfs(b'fncache', mode=b'rb')
798 except IOError:
803 except IOError:
799 # skip nonexistent file
804 # skip nonexistent file
800 self.entries = set()
805 self.entries = set()
801 return
806 return
802
807
803 self.entries = set()
808 self.entries = set()
804 chunk = b''
809 chunk = b''
805 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
810 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
806 chunk += c
811 chunk += c
807 try:
812 try:
808 p = chunk.rindex(b'\n')
813 p = chunk.rindex(b'\n')
809 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
814 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
810 chunk = chunk[p + 1 :]
815 chunk = chunk[p + 1 :]
811 except ValueError:
816 except ValueError:
812 # substring '\n' not found, maybe the entry is bigger than the
817 # substring '\n' not found, maybe the entry is bigger than the
813 # chunksize, so let's keep iterating
818 # chunksize, so let's keep iterating
814 pass
819 pass
815
820
816 if chunk:
821 if chunk:
817 msg = _(b"fncache does not ends with a newline")
822 msg = _(b"fncache does not ends with a newline")
818 if warn:
823 if warn:
819 warn(msg + b'\n')
824 warn(msg + b'\n')
820 else:
825 else:
821 raise error.Abort(
826 raise error.Abort(
822 msg,
827 msg,
823 hint=_(
828 hint=_(
824 b"use 'hg debugrebuildfncache' to "
829 b"use 'hg debugrebuildfncache' to "
825 b"rebuild the fncache"
830 b"rebuild the fncache"
826 ),
831 ),
827 )
832 )
828 self._checkentries(fp, warn)
833 self._checkentries(fp, warn)
829 fp.close()
834 fp.close()
830
835
831 def _checkentries(self, fp, warn):
836 def _checkentries(self, fp, warn):
832 """make sure there is no empty string in entries"""
837 """make sure there is no empty string in entries"""
833 if b'' in self.entries:
838 if b'' in self.entries:
834 fp.seek(0)
839 fp.seek(0)
835 for n, line in enumerate(fp):
840 for n, line in enumerate(fp):
836 if not line.rstrip(b'\n'):
841 if not line.rstrip(b'\n'):
837 t = _(b'invalid entry in fncache, line %d') % (n + 1)
842 t = _(b'invalid entry in fncache, line %d') % (n + 1)
838 if warn:
843 if warn:
839 warn(t + b'\n')
844 warn(t + b'\n')
840 else:
845 else:
841 raise error.Abort(t)
846 raise error.Abort(t)
842
847
843 def write(self, tr):
848 def write(self, tr):
844 if self._dirty:
849 if self._dirty:
845 assert self.entries is not None
850 assert self.entries is not None
846 self.entries = self.entries | self.addls
851 self.entries = self.entries | self.addls
847 self.addls = set()
852 self.addls = set()
848 tr.addbackup(b'fncache')
853 tr.addbackup(b'fncache')
849 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
854 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
850 if self.entries:
855 if self.entries:
851 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
856 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
852 fp.close()
857 fp.close()
853 self._dirty = False
858 self._dirty = False
854 if self.addls:
859 if self.addls:
855 # if we have just new entries, let's append them to the fncache
860 # if we have just new entries, let's append them to the fncache
856 tr.addbackup(b'fncache')
861 tr.addbackup(b'fncache')
857 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
862 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
858 if self.addls:
863 if self.addls:
859 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
864 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
860 fp.close()
865 fp.close()
861 self.entries = None
866 self.entries = None
862 self.addls = set()
867 self.addls = set()
863
868
864 def addignore(self, fn):
869 def addignore(self, fn):
865 self._ignores.add(fn)
870 self._ignores.add(fn)
866
871
867 def add(self, fn):
872 def add(self, fn):
868 if fn in self._ignores:
873 if fn in self._ignores:
869 return
874 return
870 if self.entries is None:
875 if self.entries is None:
871 self._load()
876 self._load()
872 if fn not in self.entries:
877 if fn not in self.entries:
873 self.addls.add(fn)
878 self.addls.add(fn)
874
879
875 def remove(self, fn):
880 def remove(self, fn):
876 if self.entries is None:
881 if self.entries is None:
877 self._load()
882 self._load()
878 if fn in self.addls:
883 if fn in self.addls:
879 self.addls.remove(fn)
884 self.addls.remove(fn)
880 return
885 return
881 try:
886 try:
882 self.entries.remove(fn)
887 self.entries.remove(fn)
883 self._dirty = True
888 self._dirty = True
884 except KeyError:
889 except KeyError:
885 pass
890 pass
886
891
887 def __contains__(self, fn):
892 def __contains__(self, fn):
888 if fn in self.addls:
893 if fn in self.addls:
889 return True
894 return True
890 if self.entries is None:
895 if self.entries is None:
891 self._load()
896 self._load()
892 return fn in self.entries
897 return fn in self.entries
893
898
894 def __iter__(self):
899 def __iter__(self):
895 if self.entries is None:
900 if self.entries is None:
896 self._load()
901 self._load()
897 return iter(self.entries | self.addls)
902 return iter(self.entries | self.addls)
898
903
899
904
900 class _fncachevfs(vfsmod.proxyvfs):
905 class _fncachevfs(vfsmod.proxyvfs):
901 def __init__(self, vfs, fnc, encode):
906 def __init__(self, vfs, fnc, encode):
902 vfsmod.proxyvfs.__init__(self, vfs)
907 vfsmod.proxyvfs.__init__(self, vfs)
903 self.fncache = fnc
908 self.fncache = fnc
904 self.encode = encode
909 self.encode = encode
905
910
906 def __call__(self, path, mode=b'r', *args, **kw):
911 def __call__(self, path, mode=b'r', *args, **kw):
907 encoded = self.encode(path)
912 encoded = self.encode(path)
908 if (
913 if (
909 mode not in (b'r', b'rb')
914 mode not in (b'r', b'rb')
910 and (path.startswith(b'data/') or path.startswith(b'meta/'))
915 and (path.startswith(b'data/') or path.startswith(b'meta/'))
911 and revlog_type(path) is not None
916 and revlog_type(path) is not None
912 ):
917 ):
913 # do not trigger a fncache load when adding a file that already is
918 # do not trigger a fncache load when adding a file that already is
914 # known to exist.
919 # known to exist.
915 notload = self.fncache.entries is None and self.vfs.exists(encoded)
920 notload = self.fncache.entries is None and self.vfs.exists(encoded)
916 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
921 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
917 # when appending to an existing file, if the file has size zero,
922 # when appending to an existing file, if the file has size zero,
918 # it should be considered as missing. Such zero-size files are
923 # it should be considered as missing. Such zero-size files are
919 # the result of truncation when a transaction is aborted.
924 # the result of truncation when a transaction is aborted.
920 notload = False
925 notload = False
921 if not notload:
926 if not notload:
922 self.fncache.add(path)
927 self.fncache.add(path)
923 return self.vfs(encoded, mode, *args, **kw)
928 return self.vfs(encoded, mode, *args, **kw)
924
929
925 def join(self, path):
930 def join(self, path):
926 if path:
931 if path:
927 return self.vfs.join(self.encode(path))
932 return self.vfs.join(self.encode(path))
928 else:
933 else:
929 return self.vfs.join(path)
934 return self.vfs.join(path)
930
935
931 def register_file(self, path):
936 def register_file(self, path):
932 """generic hook point to lets fncache steer its stew"""
937 """generic hook point to lets fncache steer its stew"""
933 if path.startswith(b'data/') or path.startswith(b'meta/'):
938 if path.startswith(b'data/') or path.startswith(b'meta/'):
934 self.fncache.add(path)
939 self.fncache.add(path)
935
940
936
941
937 class fncachestore(basicstore):
942 class fncachestore(basicstore):
938 def __init__(self, path, vfstype, dotencode):
943 def __init__(self, path, vfstype, dotencode):
939 if dotencode:
944 if dotencode:
940 encode = _pathencode
945 encode = _pathencode
941 else:
946 else:
942 encode = _plainhybridencode
947 encode = _plainhybridencode
943 self.encode = encode
948 self.encode = encode
944 vfs = vfstype(path + b'/store')
949 vfs = vfstype(path + b'/store')
945 self.path = vfs.base
950 self.path = vfs.base
946 self.pathsep = self.path + b'/'
951 self.pathsep = self.path + b'/'
947 self.createmode = _calcmode(vfs)
952 self.createmode = _calcmode(vfs)
948 vfs.createmode = self.createmode
953 vfs.createmode = self.createmode
949 self.rawvfs = vfs
954 self.rawvfs = vfs
950 fnc = fncache(vfs)
955 fnc = fncache(vfs)
951 self.fncache = fnc
956 self.fncache = fnc
952 self.vfs = _fncachevfs(vfs, fnc, encode)
957 self.vfs = _fncachevfs(vfs, fnc, encode)
953 self.opener = self.vfs
958 self.opener = self.vfs
954
959
955 def join(self, f):
960 def join(self, f):
956 return self.pathsep + self.encode(f)
961 return self.pathsep + self.encode(f)
957
962
958 def getsize(self, path):
963 def getsize(self, path):
959 return self.rawvfs.stat(path).st_size
964 return self.rawvfs.stat(path).st_size
960
965
961 def datafiles(
966 def datafiles(
962 self, matcher=None, undecodable=None
967 self, matcher=None, undecodable=None
963 ) -> Generator[BaseStoreEntry, None, None]:
968 ) -> Generator[BaseStoreEntry, None, None]:
964 files = ((f, revlog_type(f)) for f in self.fncache)
969 files = ((f, revlog_type(f)) for f in self.fncache)
965 # Note: all files in fncache should be revlog related, However the
970 # Note: all files in fncache should be revlog related, However the
966 # fncache might contains such file added by previous version of
971 # fncache might contains such file added by previous version of
967 # Mercurial.
972 # Mercurial.
968 files = (f for f in files if f[1] is not None)
973 files = (f for f in files if f[1] is not None)
969 by_revlog = _gather_revlog(files)
974 by_revlog = _gather_revlog(files)
970 for revlog, details in by_revlog:
975 for revlog, details in by_revlog:
976 if revlog.startswith(b'data/'):
977 rl_type = FILEFLAGS_FILELOG
978 elif revlog.startswith(b'meta/'):
979 rl_type = FILEFLAGS_MANIFESTLOG
980 else:
981 # unreachable
982 assert False, revlog
971 for ext, t in sorted(details.items()):
983 for ext, t in sorted(details.items()):
972 f = revlog + ext
984 f = revlog + ext
973 if not _matchtrackedpath(f, matcher):
985 if not _matchtrackedpath(f, matcher):
974 continue
986 continue
975 yield RevlogStoreEntry(
987 yield RevlogStoreEntry(
976 unencoded_path=f,
988 unencoded_path=f,
977 revlog_type=FILEFLAGS_FILELOG,
989 revlog_type=rl_type,
978 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
990 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
979 is_volatile=bool(t & FILEFLAGS_VOLATILE),
991 is_volatile=bool(t & FILEFLAGS_VOLATILE),
980 )
992 )
981
993
982 def copylist(self):
994 def copylist(self):
983 d = (
995 d = (
984 b'bookmarks',
996 b'bookmarks',
985 b'narrowspec',
997 b'narrowspec',
986 b'data',
998 b'data',
987 b'meta',
999 b'meta',
988 b'dh',
1000 b'dh',
989 b'fncache',
1001 b'fncache',
990 b'phaseroots',
1002 b'phaseroots',
991 b'obsstore',
1003 b'obsstore',
992 b'00manifest.d',
1004 b'00manifest.d',
993 b'00manifest.i',
1005 b'00manifest.i',
994 b'00changelog.d',
1006 b'00changelog.d',
995 b'00changelog.i',
1007 b'00changelog.i',
996 b'requires',
1008 b'requires',
997 )
1009 )
998 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1010 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
999
1011
1000 def write(self, tr):
1012 def write(self, tr):
1001 self.fncache.write(tr)
1013 self.fncache.write(tr)
1002
1014
1003 def invalidatecaches(self):
1015 def invalidatecaches(self):
1004 self.fncache.entries = None
1016 self.fncache.entries = None
1005 self.fncache.addls = set()
1017 self.fncache.addls = set()
1006
1018
1007 def markremoved(self, fn):
1019 def markremoved(self, fn):
1008 self.fncache.remove(fn)
1020 self.fncache.remove(fn)
1009
1021
1010 def _exists(self, f):
1022 def _exists(self, f):
1011 ef = self.encode(f)
1023 ef = self.encode(f)
1012 try:
1024 try:
1013 self.getsize(ef)
1025 self.getsize(ef)
1014 return True
1026 return True
1015 except FileNotFoundError:
1027 except FileNotFoundError:
1016 return False
1028 return False
1017
1029
1018 def __contains__(self, path):
1030 def __contains__(self, path):
1019 '''Checks if the store contains path'''
1031 '''Checks if the store contains path'''
1020 path = b"/".join((b"data", path))
1032 path = b"/".join((b"data", path))
1021 # check for files (exact match)
1033 # check for files (exact match)
1022 e = path + b'.i'
1034 e = path + b'.i'
1023 if e in self.fncache and self._exists(e):
1035 if e in self.fncache and self._exists(e):
1024 return True
1036 return True
1025 # now check for directories (prefix match)
1037 # now check for directories (prefix match)
1026 if not path.endswith(b'/'):
1038 if not path.endswith(b'/'):
1027 path += b'/'
1039 path += b'/'
1028 for e in self.fncache:
1040 for e in self.fncache:
1029 if e.startswith(path) and self._exists(e):
1041 if e.startswith(path) and self._exists(e):
1030 return True
1042 return True
1031 return False
1043 return False
General Comments 0
You need to be logged in to leave comments. Login now