##// END OF EJS Templates
store: add a `target_id` attribute on RevlogStoreEntry...
marmoute -
r51376:60e613f6 default
parent child Browse files
Show More
@@ -1,1043 +1,1054
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 REVLOG_FILES_LONG_EXT = (
399 REVLOG_FILES_LONG_EXT = (
400 b'.nd',
400 b'.nd',
401 b'.idx',
401 b'.idx',
402 b'.dat',
402 b'.dat',
403 b'.sda',
403 b'.sda',
404 )
404 )
405 # files that are "volatile" and might change between listing and streaming
405 # files that are "volatile" and might change between listing and streaming
406 #
406 #
407 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 # deleted.
408 # deleted.
409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410
410
411 # some exception to the above matching
411 # some exception to the above matching
412 #
412 #
413 # XXX This is currently not in use because of issue6542
413 # XXX This is currently not in use because of issue6542
414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415
415
416
416
417 def is_revlog(f, kind, st):
417 def is_revlog(f, kind, st):
418 if kind != stat.S_IFREG:
418 if kind != stat.S_IFREG:
419 return None
419 return None
420 return revlog_type(f)
420 return revlog_type(f)
421
421
422
422
423 def revlog_type(f):
423 def revlog_type(f):
424 # XXX we need to filter `undo.` created by the transaction here, however
424 # XXX we need to filter `undo.` created by the transaction here, however
425 # being naive about it also filter revlog for `undo.*` files, leading to
425 # being naive about it also filter revlog for `undo.*` files, leading to
426 # issue6542. So we no longer use EXCLUDED.
426 # issue6542. So we no longer use EXCLUDED.
427 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 return FILEFLAGS_REVLOG_MAIN
428 return FILEFLAGS_REVLOG_MAIN
429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 t = FILETYPE_FILELOG_OTHER
430 t = FILETYPE_FILELOG_OTHER
431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 t |= FILEFLAGS_VOLATILE
432 t |= FILEFLAGS_VOLATILE
433 return t
433 return t
434 return None
434 return None
435
435
436
436
437 # the file is part of changelog data
437 # the file is part of changelog data
438 FILEFLAGS_CHANGELOG = 1 << 13
438 FILEFLAGS_CHANGELOG = 1 << 13
439 # the file is part of manifest data
439 # the file is part of manifest data
440 FILEFLAGS_MANIFESTLOG = 1 << 12
440 FILEFLAGS_MANIFESTLOG = 1 << 12
441 # the file is part of filelog data
441 # the file is part of filelog data
442 FILEFLAGS_FILELOG = 1 << 11
442 FILEFLAGS_FILELOG = 1 << 11
443 # file that are not directly part of a revlog
443 # file that are not directly part of a revlog
444 FILEFLAGS_OTHER = 1 << 10
444 FILEFLAGS_OTHER = 1 << 10
445
445
446 # the main entry point for a revlog
446 # the main entry point for a revlog
447 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 # a secondary file for a revlog
448 # a secondary file for a revlog
449 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 FILEFLAGS_REVLOG_OTHER = 1 << 0
450
450
451 # files that are "volatile" and might change between listing and streaming
451 # files that are "volatile" and might change between listing and streaming
452 FILEFLAGS_VOLATILE = 1 << 20
452 FILEFLAGS_VOLATILE = 1 << 20
453
453
454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 FILETYPE_OTHER = FILEFLAGS_OTHER
460 FILETYPE_OTHER = FILEFLAGS_OTHER
461
461
462
462
463 @attr.s(slots=True, init=False)
463 @attr.s(slots=True, init=False)
464 class BaseStoreEntry:
464 class BaseStoreEntry:
465 """An entry in the store
465 """An entry in the store
466
466
467 This is returned by `store.walk` and represent some data in the store."""
467 This is returned by `store.walk` and represent some data in the store."""
468
468
469 unencoded_path = attr.ib()
469 unencoded_path = attr.ib()
470 _is_volatile = attr.ib(default=False)
470 _is_volatile = attr.ib(default=False)
471 _file_size = attr.ib(default=None)
471 _file_size = attr.ib(default=None)
472
472
473 def __init__(
473 def __init__(
474 self,
474 self,
475 unencoded_path,
475 unencoded_path,
476 is_volatile=False,
476 is_volatile=False,
477 file_size=None,
477 file_size=None,
478 ):
478 ):
479 self.unencoded_path = unencoded_path
479 self.unencoded_path = unencoded_path
480 self._is_volatile = is_volatile
480 self._is_volatile = is_volatile
481 self._file_size = file_size
481 self._file_size = file_size
482
482
483 def files(self):
483 def files(self):
484 return [
484 return [
485 StoreFile(
485 StoreFile(
486 unencoded_path=self.unencoded_path,
486 unencoded_path=self.unencoded_path,
487 file_size=self._file_size,
487 file_size=self._file_size,
488 is_volatile=self._is_volatile,
488 is_volatile=self._is_volatile,
489 )
489 )
490 ]
490 ]
491
491
492
492
493 @attr.s(slots=True, init=False)
493 @attr.s(slots=True, init=False)
494 class SimpleStoreEntry(BaseStoreEntry):
494 class SimpleStoreEntry(BaseStoreEntry):
495 """A generic entry in the store"""
495 """A generic entry in the store"""
496
496
497 is_revlog = False
497 is_revlog = False
498
498
499
499
500 @attr.s(slots=True, init=False)
500 @attr.s(slots=True, init=False)
501 class RevlogStoreEntry(BaseStoreEntry):
501 class RevlogStoreEntry(BaseStoreEntry):
502 """A revlog entry in the store"""
502 """A revlog entry in the store"""
503
503
504 is_revlog = True
504 is_revlog = True
505 revlog_type = attr.ib(default=None)
505 revlog_type = attr.ib(default=None)
506 target_id = attr.ib(default=None)
506 is_revlog_main = attr.ib(default=None)
507 is_revlog_main = attr.ib(default=None)
507
508
508 def __init__(
509 def __init__(
509 self,
510 self,
510 unencoded_path,
511 unencoded_path,
511 revlog_type,
512 revlog_type,
513 target_id,
512 is_revlog_main=False,
514 is_revlog_main=False,
513 is_volatile=False,
515 is_volatile=False,
514 file_size=None,
516 file_size=None,
515 ):
517 ):
516 super().__init__(
518 super().__init__(
517 unencoded_path=unencoded_path,
519 unencoded_path=unencoded_path,
518 is_volatile=is_volatile,
520 is_volatile=is_volatile,
519 file_size=file_size,
521 file_size=file_size,
520 )
522 )
521 self.revlog_type = revlog_type
523 self.revlog_type = revlog_type
524 self.target_id = target_id
522 self.is_revlog_main = is_revlog_main
525 self.is_revlog_main = is_revlog_main
523
526
524
527
525 @attr.s(slots=True)
528 @attr.s(slots=True)
526 class StoreFile:
529 class StoreFile:
527 """a file matching an entry"""
530 """a file matching an entry"""
528
531
529 unencoded_path = attr.ib()
532 unencoded_path = attr.ib()
530 _file_size = attr.ib(default=False)
533 _file_size = attr.ib(default=False)
531 is_volatile = attr.ib(default=False)
534 is_volatile = attr.ib(default=False)
532
535
533 def file_size(self, vfs):
536 def file_size(self, vfs):
534 if self._file_size is not None:
537 if self._file_size is not None:
535 return self._file_size
538 return self._file_size
536 try:
539 try:
537 return vfs.stat(self.unencoded_path).st_size
540 return vfs.stat(self.unencoded_path).st_size
538 except FileNotFoundError:
541 except FileNotFoundError:
539 return 0
542 return 0
540
543
541
544
542 def _gather_revlog(files_data):
545 def _gather_revlog(files_data):
543 """group files per revlog prefix
546 """group files per revlog prefix
544
547
545 The returns a two level nested dict. The top level key is the revlog prefix
548 The returns a two level nested dict. The top level key is the revlog prefix
546 without extension, the second level is all the file "suffix" that were
549 without extension, the second level is all the file "suffix" that were
547 seen for this revlog and arbitrary file data as value.
550 seen for this revlog and arbitrary file data as value.
548 """
551 """
549 revlogs = collections.defaultdict(dict)
552 revlogs = collections.defaultdict(dict)
550 for u, value in files_data:
553 for u, value in files_data:
551 name, ext = _split_revlog_ext(u)
554 name, ext = _split_revlog_ext(u)
552 revlogs[name][ext] = value
555 revlogs[name][ext] = value
553 return sorted(revlogs.items())
556 return sorted(revlogs.items())
554
557
555
558
556 def _split_revlog_ext(filename):
559 def _split_revlog_ext(filename):
557 """split the revlog file prefix from the variable extension"""
560 """split the revlog file prefix from the variable extension"""
558 if filename.endswith(REVLOG_FILES_LONG_EXT):
561 if filename.endswith(REVLOG_FILES_LONG_EXT):
559 char = b'-'
562 char = b'-'
560 else:
563 else:
561 char = b'.'
564 char = b'.'
562 idx = filename.rfind(char)
565 idx = filename.rfind(char)
563 return filename[:idx], filename[idx:]
566 return filename[:idx], filename[idx:]
564
567
565
568
566 def _ext_key(ext):
569 def _ext_key(ext):
567 """a key to order revlog suffix
570 """a key to order revlog suffix
568
571
569 important to issue .i after other entry."""
572 important to issue .i after other entry."""
570 # the only important part of this order is to keep the `.i` last.
573 # the only important part of this order is to keep the `.i` last.
571 if ext.endswith(b'.n'):
574 if ext.endswith(b'.n'):
572 return (0, ext)
575 return (0, ext)
573 elif ext.endswith(b'.nd'):
576 elif ext.endswith(b'.nd'):
574 return (10, ext)
577 return (10, ext)
575 elif ext.endswith(b'.d'):
578 elif ext.endswith(b'.d'):
576 return (20, ext)
579 return (20, ext)
577 elif ext.endswith(b'.i'):
580 elif ext.endswith(b'.i'):
578 return (50, ext)
581 return (50, ext)
579 else:
582 else:
580 return (40, ext)
583 return (40, ext)
581
584
582
585
583 class basicstore:
586 class basicstore:
584 '''base class for local repository stores'''
587 '''base class for local repository stores'''
585
588
586 def __init__(self, path, vfstype):
589 def __init__(self, path, vfstype):
587 vfs = vfstype(path)
590 vfs = vfstype(path)
588 self.path = vfs.base
591 self.path = vfs.base
589 self.createmode = _calcmode(vfs)
592 self.createmode = _calcmode(vfs)
590 vfs.createmode = self.createmode
593 vfs.createmode = self.createmode
591 self.rawvfs = vfs
594 self.rawvfs = vfs
592 self.vfs = vfsmod.filtervfs(vfs, encodedir)
595 self.vfs = vfsmod.filtervfs(vfs, encodedir)
593 self.opener = self.vfs
596 self.opener = self.vfs
594
597
595 def join(self, f):
598 def join(self, f):
596 return self.path + b'/' + encodedir(f)
599 return self.path + b'/' + encodedir(f)
597
600
598 def _walk(self, relpath, recurse):
601 def _walk(self, relpath, recurse):
599 '''yields (revlog_type, unencoded, size)'''
602 '''yields (revlog_type, unencoded, size)'''
600 path = self.path
603 path = self.path
601 if relpath:
604 if relpath:
602 path += b'/' + relpath
605 path += b'/' + relpath
603 striplen = len(self.path) + 1
606 striplen = len(self.path) + 1
604 l = []
607 l = []
605 if self.rawvfs.isdir(path):
608 if self.rawvfs.isdir(path):
606 visit = [path]
609 visit = [path]
607 readdir = self.rawvfs.readdir
610 readdir = self.rawvfs.readdir
608 while visit:
611 while visit:
609 p = visit.pop()
612 p = visit.pop()
610 for f, kind, st in readdir(p, stat=True):
613 for f, kind, st in readdir(p, stat=True):
611 fp = p + b'/' + f
614 fp = p + b'/' + f
612 rl_type = is_revlog(f, kind, st)
615 rl_type = is_revlog(f, kind, st)
613 if rl_type is not None:
616 if rl_type is not None:
614 n = util.pconvert(fp[striplen:])
617 n = util.pconvert(fp[striplen:])
615 l.append((decodedir(n), (rl_type, st.st_size)))
618 l.append((decodedir(n), (rl_type, st.st_size)))
616 elif kind == stat.S_IFDIR and recurse:
619 elif kind == stat.S_IFDIR and recurse:
617 visit.append(fp)
620 visit.append(fp)
618
621
619 l.sort()
622 l.sort()
620 return l
623 return l
621
624
622 def changelog(self, trypending, concurrencychecker=None):
625 def changelog(self, trypending, concurrencychecker=None):
623 return changelog.changelog(
626 return changelog.changelog(
624 self.vfs,
627 self.vfs,
625 trypending=trypending,
628 trypending=trypending,
626 concurrencychecker=concurrencychecker,
629 concurrencychecker=concurrencychecker,
627 )
630 )
628
631
629 def manifestlog(self, repo, storenarrowmatch):
632 def manifestlog(self, repo, storenarrowmatch):
630 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
633 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
631 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
634 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
632
635
633 def datafiles(
636 def datafiles(
634 self, matcher=None, undecodable=None
637 self, matcher=None, undecodable=None
635 ) -> Generator[BaseStoreEntry, None, None]:
638 ) -> Generator[BaseStoreEntry, None, None]:
636 """Like walk, but excluding the changelog and root manifest.
639 """Like walk, but excluding the changelog and root manifest.
637
640
638 When [undecodable] is None, revlogs names that can't be
641 When [undecodable] is None, revlogs names that can't be
639 decoded cause an exception. When it is provided, it should
642 decoded cause an exception. When it is provided, it should
640 be a list and the filenames that can't be decoded are added
643 be a list and the filenames that can't be decoded are added
641 to it instead. This is very rarely needed."""
644 to it instead. This is very rarely needed."""
642 dirs = [
645 dirs = [
643 (b'data', FILEFLAGS_FILELOG),
646 (b'data', FILEFLAGS_FILELOG),
644 (b'meta', FILEFLAGS_MANIFESTLOG),
647 (b'meta', FILEFLAGS_MANIFESTLOG),
645 ]
648 ]
646 for base_dir, rl_type in dirs:
649 for base_dir, rl_type in dirs:
647 files = self._walk(base_dir, True)
650 files = self._walk(base_dir, True)
648 files = (f for f in files if f[1][0] is not None)
651 files = (f for f in files if f[1][0] is not None)
649 for revlog, details in _gather_revlog(files):
652 for revlog, details in _gather_revlog(files):
650 for ext, (t, s) in sorted(details.items()):
653 for ext, (t, s) in sorted(details.items()):
651 u = revlog + ext
654 u = revlog + ext
655 revlog_target_id = revlog.split(b'/', 1)[1]
652 yield RevlogStoreEntry(
656 yield RevlogStoreEntry(
653 unencoded_path=u,
657 unencoded_path=u,
654 revlog_type=rl_type,
658 revlog_type=rl_type,
659 target_id=revlog_target_id,
655 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
660 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
656 is_volatile=bool(t & FILEFLAGS_VOLATILE),
661 is_volatile=bool(t & FILEFLAGS_VOLATILE),
657 file_size=s,
662 file_size=s,
658 )
663 )
659
664
660 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
665 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
661 files = reversed(self._walk(b'', False))
666 files = reversed(self._walk(b'', False))
662
667
663 changelogs = collections.defaultdict(dict)
668 changelogs = collections.defaultdict(dict)
664 manifestlogs = collections.defaultdict(dict)
669 manifestlogs = collections.defaultdict(dict)
665
670
666 for u, (t, s) in files:
671 for u, (t, s) in files:
667 if u.startswith(b'00changelog'):
672 if u.startswith(b'00changelog'):
668 name, ext = _split_revlog_ext(u)
673 name, ext = _split_revlog_ext(u)
669 changelogs[name][ext] = (t, s)
674 changelogs[name][ext] = (t, s)
670 elif u.startswith(b'00manifest'):
675 elif u.startswith(b'00manifest'):
671 name, ext = _split_revlog_ext(u)
676 name, ext = _split_revlog_ext(u)
672 manifestlogs[name][ext] = (t, s)
677 manifestlogs[name][ext] = (t, s)
673 else:
678 else:
674 yield SimpleStoreEntry(
679 yield SimpleStoreEntry(
675 unencoded_path=u,
680 unencoded_path=u,
676 is_volatile=bool(t & FILEFLAGS_VOLATILE),
681 is_volatile=bool(t & FILEFLAGS_VOLATILE),
677 file_size=s,
682 file_size=s,
678 )
683 )
679 # yield manifest before changelog
684 # yield manifest before changelog
680 top_rl = [
685 top_rl = [
681 (manifestlogs, FILEFLAGS_MANIFESTLOG),
686 (manifestlogs, FILEFLAGS_MANIFESTLOG),
682 (changelogs, FILEFLAGS_CHANGELOG),
687 (changelogs, FILEFLAGS_CHANGELOG),
683 ]
688 ]
684 assert len(manifestlogs) <= 1
689 assert len(manifestlogs) <= 1
685 assert len(changelogs) <= 1
690 assert len(changelogs) <= 1
686 for data, revlog_type in top_rl:
691 for data, revlog_type in top_rl:
687 for revlog, details in sorted(data.items()):
692 for revlog, details in sorted(data.items()):
688 # (keeping ordering so we get 00changelog.i last)
693 # (keeping ordering so we get 00changelog.i last)
689 key = lambda x: _ext_key(x[0])
694 key = lambda x: _ext_key(x[0])
690 for ext, (t, s) in sorted(details.items(), key=key):
695 for ext, (t, s) in sorted(details.items(), key=key):
691 u = revlog + ext
696 u = revlog + ext
692 yield RevlogStoreEntry(
697 yield RevlogStoreEntry(
693 unencoded_path=u,
698 unencoded_path=u,
694 revlog_type=revlog_type,
699 revlog_type=revlog_type,
700 target_id=b'',
695 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
701 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
696 is_volatile=bool(t & FILEFLAGS_VOLATILE),
702 is_volatile=bool(t & FILEFLAGS_VOLATILE),
697 file_size=s,
703 file_size=s,
698 )
704 )
699
705
700 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
706 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
701 """return files related to data storage (ie: revlogs)
707 """return files related to data storage (ie: revlogs)
702
708
703 yields (file_type, unencoded, size)
709 yields (file_type, unencoded, size)
704
710
705 if a matcher is passed, storage files of only those tracked paths
711 if a matcher is passed, storage files of only those tracked paths
706 are passed with matches the matcher
712 are passed with matches the matcher
707 """
713 """
708 # yield data files first
714 # yield data files first
709 for x in self.datafiles(matcher):
715 for x in self.datafiles(matcher):
710 yield x
716 yield x
711 for x in self.topfiles():
717 for x in self.topfiles():
712 yield x
718 yield x
713
719
714 def copylist(self):
720 def copylist(self):
715 return _data
721 return _data
716
722
717 def write(self, tr):
723 def write(self, tr):
718 pass
724 pass
719
725
720 def invalidatecaches(self):
726 def invalidatecaches(self):
721 pass
727 pass
722
728
723 def markremoved(self, fn):
729 def markremoved(self, fn):
724 pass
730 pass
725
731
726 def __contains__(self, path):
732 def __contains__(self, path):
727 '''Checks if the store contains path'''
733 '''Checks if the store contains path'''
728 path = b"/".join((b"data", path))
734 path = b"/".join((b"data", path))
729 # file?
735 # file?
730 if self.vfs.exists(path + b".i"):
736 if self.vfs.exists(path + b".i"):
731 return True
737 return True
732 # dir?
738 # dir?
733 if not path.endswith(b"/"):
739 if not path.endswith(b"/"):
734 path = path + b"/"
740 path = path + b"/"
735 return self.vfs.exists(path)
741 return self.vfs.exists(path)
736
742
737
743
738 class encodedstore(basicstore):
744 class encodedstore(basicstore):
739 def __init__(self, path, vfstype):
745 def __init__(self, path, vfstype):
740 vfs = vfstype(path + b'/store')
746 vfs = vfstype(path + b'/store')
741 self.path = vfs.base
747 self.path = vfs.base
742 self.createmode = _calcmode(vfs)
748 self.createmode = _calcmode(vfs)
743 vfs.createmode = self.createmode
749 vfs.createmode = self.createmode
744 self.rawvfs = vfs
750 self.rawvfs = vfs
745 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
751 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
746 self.opener = self.vfs
752 self.opener = self.vfs
747
753
748 # note: topfiles would also need a decode phase. It is just that in
754 # note: topfiles would also need a decode phase. It is just that in
749 # practice we do not have any file outside of `data/` that needs encoding.
755 # practice we do not have any file outside of `data/` that needs encoding.
750 # However that might change so we should probably add a test and encoding
756 # However that might change so we should probably add a test and encoding
751 # decoding for it too. see issue6548
757 # decoding for it too. see issue6548
752
758
753 def datafiles(
759 def datafiles(
754 self, matcher=None, undecodable=None
760 self, matcher=None, undecodable=None
755 ) -> Generator[BaseStoreEntry, None, None]:
761 ) -> Generator[BaseStoreEntry, None, None]:
756 for entry in super(encodedstore, self).datafiles():
762 for entry in super(encodedstore, self).datafiles():
757 try:
763 try:
758 f1 = entry.unencoded_path
764 f1 = entry.unencoded_path
759 f2 = decodefilename(f1)
765 f2 = decodefilename(f1)
760 except KeyError:
766 except KeyError:
761 if undecodable is None:
767 if undecodable is None:
762 msg = _(b'undecodable revlog name %s') % f1
768 msg = _(b'undecodable revlog name %s') % f1
763 raise error.StorageError(msg)
769 raise error.StorageError(msg)
764 else:
770 else:
765 undecodable.append(f1)
771 undecodable.append(f1)
766 continue
772 continue
767 if not _matchtrackedpath(f2, matcher):
773 if not _matchtrackedpath(f2, matcher):
768 continue
774 continue
769 entry.unencoded_path = f2
775 entry.unencoded_path = f2
770 yield entry
776 yield entry
771
777
772 def join(self, f):
778 def join(self, f):
773 return self.path + b'/' + encodefilename(f)
779 return self.path + b'/' + encodefilename(f)
774
780
775 def copylist(self):
781 def copylist(self):
776 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
782 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
777
783
778
784
779 class fncache:
785 class fncache:
780 # the filename used to be partially encoded
786 # the filename used to be partially encoded
781 # hence the encodedir/decodedir dance
787 # hence the encodedir/decodedir dance
782 def __init__(self, vfs):
788 def __init__(self, vfs):
783 self.vfs = vfs
789 self.vfs = vfs
784 self._ignores = set()
790 self._ignores = set()
785 self.entries = None
791 self.entries = None
786 self._dirty = False
792 self._dirty = False
787 # set of new additions to fncache
793 # set of new additions to fncache
788 self.addls = set()
794 self.addls = set()
789
795
790 def ensureloaded(self, warn=None):
796 def ensureloaded(self, warn=None):
791 """read the fncache file if not already read.
797 """read the fncache file if not already read.
792
798
793 If the file on disk is corrupted, raise. If warn is provided,
799 If the file on disk is corrupted, raise. If warn is provided,
794 warn and keep going instead."""
800 warn and keep going instead."""
795 if self.entries is None:
801 if self.entries is None:
796 self._load(warn)
802 self._load(warn)
797
803
798 def _load(self, warn=None):
804 def _load(self, warn=None):
799 '''fill the entries from the fncache file'''
805 '''fill the entries from the fncache file'''
800 self._dirty = False
806 self._dirty = False
801 try:
807 try:
802 fp = self.vfs(b'fncache', mode=b'rb')
808 fp = self.vfs(b'fncache', mode=b'rb')
803 except IOError:
809 except IOError:
804 # skip nonexistent file
810 # skip nonexistent file
805 self.entries = set()
811 self.entries = set()
806 return
812 return
807
813
808 self.entries = set()
814 self.entries = set()
809 chunk = b''
815 chunk = b''
810 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
816 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
811 chunk += c
817 chunk += c
812 try:
818 try:
813 p = chunk.rindex(b'\n')
819 p = chunk.rindex(b'\n')
814 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
820 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
815 chunk = chunk[p + 1 :]
821 chunk = chunk[p + 1 :]
816 except ValueError:
822 except ValueError:
817 # substring '\n' not found, maybe the entry is bigger than the
823 # substring '\n' not found, maybe the entry is bigger than the
818 # chunksize, so let's keep iterating
824 # chunksize, so let's keep iterating
819 pass
825 pass
820
826
821 if chunk:
827 if chunk:
822 msg = _(b"fncache does not ends with a newline")
828 msg = _(b"fncache does not ends with a newline")
823 if warn:
829 if warn:
824 warn(msg + b'\n')
830 warn(msg + b'\n')
825 else:
831 else:
826 raise error.Abort(
832 raise error.Abort(
827 msg,
833 msg,
828 hint=_(
834 hint=_(
829 b"use 'hg debugrebuildfncache' to "
835 b"use 'hg debugrebuildfncache' to "
830 b"rebuild the fncache"
836 b"rebuild the fncache"
831 ),
837 ),
832 )
838 )
833 self._checkentries(fp, warn)
839 self._checkentries(fp, warn)
834 fp.close()
840 fp.close()
835
841
836 def _checkentries(self, fp, warn):
842 def _checkentries(self, fp, warn):
837 """make sure there is no empty string in entries"""
843 """make sure there is no empty string in entries"""
838 if b'' in self.entries:
844 if b'' in self.entries:
839 fp.seek(0)
845 fp.seek(0)
840 for n, line in enumerate(fp):
846 for n, line in enumerate(fp):
841 if not line.rstrip(b'\n'):
847 if not line.rstrip(b'\n'):
842 t = _(b'invalid entry in fncache, line %d') % (n + 1)
848 t = _(b'invalid entry in fncache, line %d') % (n + 1)
843 if warn:
849 if warn:
844 warn(t + b'\n')
850 warn(t + b'\n')
845 else:
851 else:
846 raise error.Abort(t)
852 raise error.Abort(t)
847
853
848 def write(self, tr):
854 def write(self, tr):
849 if self._dirty:
855 if self._dirty:
850 assert self.entries is not None
856 assert self.entries is not None
851 self.entries = self.entries | self.addls
857 self.entries = self.entries | self.addls
852 self.addls = set()
858 self.addls = set()
853 tr.addbackup(b'fncache')
859 tr.addbackup(b'fncache')
854 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
860 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
855 if self.entries:
861 if self.entries:
856 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
862 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
857 fp.close()
863 fp.close()
858 self._dirty = False
864 self._dirty = False
859 if self.addls:
865 if self.addls:
860 # if we have just new entries, let's append them to the fncache
866 # if we have just new entries, let's append them to the fncache
861 tr.addbackup(b'fncache')
867 tr.addbackup(b'fncache')
862 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
868 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
863 if self.addls:
869 if self.addls:
864 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
870 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
865 fp.close()
871 fp.close()
866 self.entries = None
872 self.entries = None
867 self.addls = set()
873 self.addls = set()
868
874
869 def addignore(self, fn):
875 def addignore(self, fn):
870 self._ignores.add(fn)
876 self._ignores.add(fn)
871
877
872 def add(self, fn):
878 def add(self, fn):
873 if fn in self._ignores:
879 if fn in self._ignores:
874 return
880 return
875 if self.entries is None:
881 if self.entries is None:
876 self._load()
882 self._load()
877 if fn not in self.entries:
883 if fn not in self.entries:
878 self.addls.add(fn)
884 self.addls.add(fn)
879
885
880 def remove(self, fn):
886 def remove(self, fn):
881 if self.entries is None:
887 if self.entries is None:
882 self._load()
888 self._load()
883 if fn in self.addls:
889 if fn in self.addls:
884 self.addls.remove(fn)
890 self.addls.remove(fn)
885 return
891 return
886 try:
892 try:
887 self.entries.remove(fn)
893 self.entries.remove(fn)
888 self._dirty = True
894 self._dirty = True
889 except KeyError:
895 except KeyError:
890 pass
896 pass
891
897
892 def __contains__(self, fn):
898 def __contains__(self, fn):
893 if fn in self.addls:
899 if fn in self.addls:
894 return True
900 return True
895 if self.entries is None:
901 if self.entries is None:
896 self._load()
902 self._load()
897 return fn in self.entries
903 return fn in self.entries
898
904
899 def __iter__(self):
905 def __iter__(self):
900 if self.entries is None:
906 if self.entries is None:
901 self._load()
907 self._load()
902 return iter(self.entries | self.addls)
908 return iter(self.entries | self.addls)
903
909
904
910
905 class _fncachevfs(vfsmod.proxyvfs):
911 class _fncachevfs(vfsmod.proxyvfs):
906 def __init__(self, vfs, fnc, encode):
912 def __init__(self, vfs, fnc, encode):
907 vfsmod.proxyvfs.__init__(self, vfs)
913 vfsmod.proxyvfs.__init__(self, vfs)
908 self.fncache = fnc
914 self.fncache = fnc
909 self.encode = encode
915 self.encode = encode
910
916
911 def __call__(self, path, mode=b'r', *args, **kw):
917 def __call__(self, path, mode=b'r', *args, **kw):
912 encoded = self.encode(path)
918 encoded = self.encode(path)
913 if (
919 if (
914 mode not in (b'r', b'rb')
920 mode not in (b'r', b'rb')
915 and (path.startswith(b'data/') or path.startswith(b'meta/'))
921 and (path.startswith(b'data/') or path.startswith(b'meta/'))
916 and revlog_type(path) is not None
922 and revlog_type(path) is not None
917 ):
923 ):
918 # do not trigger a fncache load when adding a file that already is
924 # do not trigger a fncache load when adding a file that already is
919 # known to exist.
925 # known to exist.
920 notload = self.fncache.entries is None and self.vfs.exists(encoded)
926 notload = self.fncache.entries is None and self.vfs.exists(encoded)
921 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
927 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
922 # when appending to an existing file, if the file has size zero,
928 # when appending to an existing file, if the file has size zero,
923 # it should be considered as missing. Such zero-size files are
929 # it should be considered as missing. Such zero-size files are
924 # the result of truncation when a transaction is aborted.
930 # the result of truncation when a transaction is aborted.
925 notload = False
931 notload = False
926 if not notload:
932 if not notload:
927 self.fncache.add(path)
933 self.fncache.add(path)
928 return self.vfs(encoded, mode, *args, **kw)
934 return self.vfs(encoded, mode, *args, **kw)
929
935
930 def join(self, path):
936 def join(self, path):
931 if path:
937 if path:
932 return self.vfs.join(self.encode(path))
938 return self.vfs.join(self.encode(path))
933 else:
939 else:
934 return self.vfs.join(path)
940 return self.vfs.join(path)
935
941
936 def register_file(self, path):
942 def register_file(self, path):
937 """generic hook point to lets fncache steer its stew"""
943 """generic hook point to lets fncache steer its stew"""
938 if path.startswith(b'data/') or path.startswith(b'meta/'):
944 if path.startswith(b'data/') or path.startswith(b'meta/'):
939 self.fncache.add(path)
945 self.fncache.add(path)
940
946
941
947
942 class fncachestore(basicstore):
948 class fncachestore(basicstore):
943 def __init__(self, path, vfstype, dotencode):
949 def __init__(self, path, vfstype, dotencode):
944 if dotencode:
950 if dotencode:
945 encode = _pathencode
951 encode = _pathencode
946 else:
952 else:
947 encode = _plainhybridencode
953 encode = _plainhybridencode
948 self.encode = encode
954 self.encode = encode
949 vfs = vfstype(path + b'/store')
955 vfs = vfstype(path + b'/store')
950 self.path = vfs.base
956 self.path = vfs.base
951 self.pathsep = self.path + b'/'
957 self.pathsep = self.path + b'/'
952 self.createmode = _calcmode(vfs)
958 self.createmode = _calcmode(vfs)
953 vfs.createmode = self.createmode
959 vfs.createmode = self.createmode
954 self.rawvfs = vfs
960 self.rawvfs = vfs
955 fnc = fncache(vfs)
961 fnc = fncache(vfs)
956 self.fncache = fnc
962 self.fncache = fnc
957 self.vfs = _fncachevfs(vfs, fnc, encode)
963 self.vfs = _fncachevfs(vfs, fnc, encode)
958 self.opener = self.vfs
964 self.opener = self.vfs
959
965
960 def join(self, f):
966 def join(self, f):
961 return self.pathsep + self.encode(f)
967 return self.pathsep + self.encode(f)
962
968
963 def getsize(self, path):
969 def getsize(self, path):
964 return self.rawvfs.stat(path).st_size
970 return self.rawvfs.stat(path).st_size
965
971
966 def datafiles(
972 def datafiles(
967 self, matcher=None, undecodable=None
973 self, matcher=None, undecodable=None
968 ) -> Generator[BaseStoreEntry, None, None]:
974 ) -> Generator[BaseStoreEntry, None, None]:
969 files = ((f, revlog_type(f)) for f in self.fncache)
975 files = ((f, revlog_type(f)) for f in self.fncache)
970 # Note: all files in fncache should be revlog related, However the
976 # Note: all files in fncache should be revlog related, However the
971 # fncache might contains such file added by previous version of
977 # fncache might contains such file added by previous version of
972 # Mercurial.
978 # Mercurial.
973 files = (f for f in files if f[1] is not None)
979 files = (f for f in files if f[1] is not None)
974 by_revlog = _gather_revlog(files)
980 by_revlog = _gather_revlog(files)
975 for revlog, details in by_revlog:
981 for revlog, details in by_revlog:
976 if revlog.startswith(b'data/'):
982 if revlog.startswith(b'data/'):
977 rl_type = FILEFLAGS_FILELOG
983 rl_type = FILEFLAGS_FILELOG
984 revlog_target_id = revlog.split(b'/', 1)[1]
978 elif revlog.startswith(b'meta/'):
985 elif revlog.startswith(b'meta/'):
979 rl_type = FILEFLAGS_MANIFESTLOG
986 rl_type = FILEFLAGS_MANIFESTLOG
987 # drop the initial directory and the `00manifest` file part
988 tmp = revlog.split(b'/', 1)[1]
989 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
980 else:
990 else:
981 # unreachable
991 # unreachable
982 assert False, revlog
992 assert False, revlog
983 for ext, t in sorted(details.items()):
993 for ext, t in sorted(details.items()):
984 f = revlog + ext
994 f = revlog + ext
985 if not _matchtrackedpath(f, matcher):
995 if not _matchtrackedpath(f, matcher):
986 continue
996 continue
987 yield RevlogStoreEntry(
997 yield RevlogStoreEntry(
988 unencoded_path=f,
998 unencoded_path=f,
989 revlog_type=rl_type,
999 revlog_type=rl_type,
1000 target_id=revlog_target_id,
990 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1001 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
991 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1002 is_volatile=bool(t & FILEFLAGS_VOLATILE),
992 )
1003 )
993
1004
994 def copylist(self):
1005 def copylist(self):
995 d = (
1006 d = (
996 b'bookmarks',
1007 b'bookmarks',
997 b'narrowspec',
1008 b'narrowspec',
998 b'data',
1009 b'data',
999 b'meta',
1010 b'meta',
1000 b'dh',
1011 b'dh',
1001 b'fncache',
1012 b'fncache',
1002 b'phaseroots',
1013 b'phaseroots',
1003 b'obsstore',
1014 b'obsstore',
1004 b'00manifest.d',
1015 b'00manifest.d',
1005 b'00manifest.i',
1016 b'00manifest.i',
1006 b'00changelog.d',
1017 b'00changelog.d',
1007 b'00changelog.i',
1018 b'00changelog.i',
1008 b'requires',
1019 b'requires',
1009 )
1020 )
1010 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1021 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1011
1022
1012 def write(self, tr):
1023 def write(self, tr):
1013 self.fncache.write(tr)
1024 self.fncache.write(tr)
1014
1025
1015 def invalidatecaches(self):
1026 def invalidatecaches(self):
1016 self.fncache.entries = None
1027 self.fncache.entries = None
1017 self.fncache.addls = set()
1028 self.fncache.addls = set()
1018
1029
1019 def markremoved(self, fn):
1030 def markremoved(self, fn):
1020 self.fncache.remove(fn)
1031 self.fncache.remove(fn)
1021
1032
1022 def _exists(self, f):
1033 def _exists(self, f):
1023 ef = self.encode(f)
1034 ef = self.encode(f)
1024 try:
1035 try:
1025 self.getsize(ef)
1036 self.getsize(ef)
1026 return True
1037 return True
1027 except FileNotFoundError:
1038 except FileNotFoundError:
1028 return False
1039 return False
1029
1040
1030 def __contains__(self, path):
1041 def __contains__(self, path):
1031 '''Checks if the store contains path'''
1042 '''Checks if the store contains path'''
1032 path = b"/".join((b"data", path))
1043 path = b"/".join((b"data", path))
1033 # check for files (exact match)
1044 # check for files (exact match)
1034 e = path + b'.i'
1045 e = path + b'.i'
1035 if e in self.fncache and self._exists(e):
1046 if e in self.fncache and self._exists(e):
1036 return True
1047 return True
1037 # now check for directories (prefix match)
1048 # now check for directories (prefix match)
1038 if not path.endswith(b'/'):
1049 if not path.endswith(b'/'):
1039 path += b'/'
1050 path += b'/'
1040 for e in self.fncache:
1051 for e in self.fncache:
1041 if e.startswith(path) and self._exists(e):
1052 if e.startswith(path) and self._exists(e):
1042 return True
1053 return True
1043 return False
1054 return False
General Comments 0
You need to be logged in to leave comments. Login now