##// END OF EJS Templates
store: cache the `files()` return for store entries...
marmoute -
r51524:b59e0a4f default
parent child Browse files
Show More
@@ -1,1117 +1,1124 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 filelog,
22 filelog,
23 manifest,
23 manifest,
24 policy,
24 policy,
25 pycompat,
25 pycompat,
26 util,
26 util,
27 vfs as vfsmod,
27 vfs as vfsmod,
28 )
28 )
29 from .utils import hashutil
29 from .utils import hashutil
30
30
31 parsers = policy.importmod('parsers')
31 parsers = policy.importmod('parsers')
32 # how much bytes should be read from fncache in one read
32 # how much bytes should be read from fncache in one read
33 # It is done to prevent loading large fncache files into memory
33 # It is done to prevent loading large fncache files into memory
34 fncache_chunksize = 10 ** 6
34 fncache_chunksize = 10 ** 6
35
35
36
36
37 def _match_tracked_entry(entry, matcher):
37 def _match_tracked_entry(entry, matcher):
38 """parses a fncache entry and returns whether the entry is tracking a path
38 """parses a fncache entry and returns whether the entry is tracking a path
39 matched by matcher or not.
39 matched by matcher or not.
40
40
41 If matcher is None, returns True"""
41 If matcher is None, returns True"""
42
42
43 if matcher is None:
43 if matcher is None:
44 return True
44 return True
45 if entry.is_filelog:
45 if entry.is_filelog:
46 return matcher(entry.target_id)
46 return matcher(entry.target_id)
47 elif entry.is_manifestlog:
47 elif entry.is_manifestlog:
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50
50
51
51
52 # This avoids a collision between a file named foo and a dir named
52 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
53 # foo.i or foo.d
54 def _encodedir(path):
54 def _encodedir(path):
55 """
55 """
56 >>> _encodedir(b'data/foo.i')
56 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
57 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
58 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
59 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
61 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 """
64 """
65 return (
65 return (
66 path.replace(b".hg/", b".hg.hg/")
66 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
67 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
68 .replace(b".d/", b".d.hg/")
69 )
69 )
70
70
71
71
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
73
74
74
75 def decodedir(path):
75 def decodedir(path):
76 """
76 """
77 >>> decodedir(b'data/foo.i')
77 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
78 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
80 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
82 'data/foo.i.hg/bla.i'
83 """
83 """
84 if b".hg/" not in path:
84 if b".hg/" not in path:
85 return path
85 return path
86 return (
86 return (
87 path.replace(b".d.hg/", b".d/")
87 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
88 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
89 .replace(b".hg.hg/", b".hg/")
90 )
90 )
91
91
92
92
93 def _reserved():
93 def _reserved():
94 """characters that are problematic for filesystems
94 """characters that are problematic for filesystems
95
95
96 * ascii escapes (0..31)
96 * ascii escapes (0..31)
97 * ascii hi (126..255)
97 * ascii hi (126..255)
98 * windows specials
98 * windows specials
99
99
100 these characters will be escaped by encodefunctions
100 these characters will be escaped by encodefunctions
101 """
101 """
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
103 for x in range(32):
104 yield x
104 yield x
105 for x in range(126, 256):
105 for x in range(126, 256):
106 yield x
106 yield x
107 for x in winreserved:
107 for x in winreserved:
108 yield x
108 yield x
109
109
110
110
111 def _buildencodefun():
111 def _buildencodefun():
112 """
112 """
113 >>> enc, dec = _buildencodefun()
113 >>> enc, dec = _buildencodefun()
114
114
115 >>> enc(b'nothing/special.txt')
115 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
116 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
117 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
118 'nothing/special.txt'
119
119
120 >>> enc(b'HELLO')
120 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
121 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
122 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
123 'HELLO'
124
124
125 >>> enc(b'hello:world?')
125 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
126 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
127 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
128 'hello:world?'
129
129
130 >>> enc(b'the\\x07quick\\xADshot')
130 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
131 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
132 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
133 'the\\x07quick\\xadshot'
134 """
134 """
135 e = b'_'
135 e = b'_'
136 xchr = pycompat.bytechr
136 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
137 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
139
140 cmap = {x: x for x in asciistr}
140 cmap = {x: x for x in asciistr}
141 for x in _reserved():
141 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
142 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
143 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
144 cmap[xchr(x)] = e + xchr(x).lower()
145
145
146 dmap = {}
146 dmap = {}
147 for k, v in cmap.items():
147 for k, v in cmap.items():
148 dmap[v] = k
148 dmap[v] = k
149
149
150 def decode(s):
150 def decode(s):
151 i = 0
151 i = 0
152 while i < len(s):
152 while i < len(s):
153 for l in range(1, 4):
153 for l in range(1, 4):
154 try:
154 try:
155 yield dmap[s[i : i + l]]
155 yield dmap[s[i : i + l]]
156 i += l
156 i += l
157 break
157 break
158 except KeyError:
158 except KeyError:
159 pass
159 pass
160 else:
160 else:
161 raise KeyError
161 raise KeyError
162
162
163 return (
163 return (
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join(list(decode(s))),
165 lambda s: b''.join(list(decode(s))),
166 )
166 )
167
167
168
168
169 _encodefname, _decodefname = _buildencodefun()
169 _encodefname, _decodefname = _buildencodefun()
170
170
171
171
172 def encodefilename(s):
172 def encodefilename(s):
173 """
173 """
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 """
176 """
177 return _encodefname(encodedir(s))
177 return _encodefname(encodedir(s))
178
178
179
179
180 def decodefilename(s):
180 def decodefilename(s):
181 """
181 """
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 """
184 """
185 return decodedir(_decodefname(s))
185 return decodedir(_decodefname(s))
186
186
187
187
188 def _buildlowerencodefun():
188 def _buildlowerencodefun():
189 """
189 """
190 >>> f = _buildlowerencodefun()
190 >>> f = _buildlowerencodefun()
191 >>> f(b'nothing/special.txt')
191 >>> f(b'nothing/special.txt')
192 'nothing/special.txt'
192 'nothing/special.txt'
193 >>> f(b'HELLO')
193 >>> f(b'HELLO')
194 'hello'
194 'hello'
195 >>> f(b'hello:world?')
195 >>> f(b'hello:world?')
196 'hello~3aworld~3f'
196 'hello~3aworld~3f'
197 >>> f(b'the\\x07quick\\xADshot')
197 >>> f(b'the\\x07quick\\xADshot')
198 'the~07quick~adshot'
198 'the~07quick~adshot'
199 """
199 """
200 xchr = pycompat.bytechr
200 xchr = pycompat.bytechr
201 cmap = {xchr(x): xchr(x) for x in range(127)}
201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 for x in _reserved():
202 for x in _reserved():
203 cmap[xchr(x)] = b"~%02x" % x
203 cmap[xchr(x)] = b"~%02x" % x
204 for x in range(ord(b"A"), ord(b"Z") + 1):
204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 cmap[xchr(x)] = xchr(x).lower()
205 cmap[xchr(x)] = xchr(x).lower()
206
206
207 def lowerencode(s):
207 def lowerencode(s):
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209
209
210 return lowerencode
210 return lowerencode
211
211
212
212
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214
214
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218
218
219
219
220 def _auxencode(path, dotencode):
220 def _auxencode(path, dotencode):
221 """
221 """
222 Encodes filenames containing names reserved by Windows or which end in
222 Encodes filenames containing names reserved by Windows or which end in
223 period or space. Does not touch other single reserved characters c.
223 period or space. Does not touch other single reserved characters c.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Additionally encodes space or period at the beginning, if dotencode is
225 Additionally encodes space or period at the beginning, if dotencode is
226 True. Parameter path is assumed to be all lowercase.
226 True. Parameter path is assumed to be all lowercase.
227 A segment only needs encoding if a reserved name appears as a
227 A segment only needs encoding if a reserved name appears as a
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 doesn't need encoding.
229 doesn't need encoding.
230
230
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> _auxencode(s.split(b'/'), True)
232 >>> _auxencode(s.split(b'/'), True)
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> _auxencode(s.split(b'/'), False)
235 >>> _auxencode(s.split(b'/'), False)
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 >>> _auxencode([b'foo. '], True)
237 >>> _auxencode([b'foo. '], True)
238 ['foo.~20']
238 ['foo.~20']
239 >>> _auxencode([b' .foo'], True)
239 >>> _auxencode([b' .foo'], True)
240 ['~20.foo']
240 ['~20.foo']
241 """
241 """
242 for i, n in enumerate(path):
242 for i, n in enumerate(path):
243 if not n:
243 if not n:
244 continue
244 continue
245 if dotencode and n[0] in b'. ':
245 if dotencode and n[0] in b'. ':
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 path[i] = n
247 path[i] = n
248 else:
248 else:
249 l = n.find(b'.')
249 l = n.find(b'.')
250 if l == -1:
250 if l == -1:
251 l = len(n)
251 l = len(n)
252 if (l == 3 and n[:3] in _winres3) or (
252 if (l == 3 and n[:3] in _winres3) or (
253 l == 4
253 l == 4
254 and n[3:4] <= b'9'
254 and n[3:4] <= b'9'
255 and n[3:4] >= b'1'
255 and n[3:4] >= b'1'
256 and n[:3] in _winres4
256 and n[:3] in _winres4
257 ):
257 ):
258 # encode third letter ('aux' -> 'au~78')
258 # encode third letter ('aux' -> 'au~78')
259 ec = b"~%02x" % ord(n[2:3])
259 ec = b"~%02x" % ord(n[2:3])
260 n = n[0:2] + ec + n[3:]
260 n = n[0:2] + ec + n[3:]
261 path[i] = n
261 path[i] = n
262 if n[-1] in b'. ':
262 if n[-1] in b'. ':
263 # encode last period or space ('foo...' -> 'foo..~2e')
263 # encode last period or space ('foo...' -> 'foo..~2e')
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 return path
265 return path
266
266
267
267
268 _maxstorepathlen = 120
268 _maxstorepathlen = 120
269 _dirprefixlen = 8
269 _dirprefixlen = 8
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271
271
272
272
273 def _hashencode(path, dotencode):
273 def _hashencode(path, dotencode):
274 digest = hex(hashutil.sha1(path).digest())
274 digest = hex(hashutil.sha1(path).digest())
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 parts = _auxencode(le, dotencode)
276 parts = _auxencode(le, dotencode)
277 basename = parts[-1]
277 basename = parts[-1]
278 _root, ext = os.path.splitext(basename)
278 _root, ext = os.path.splitext(basename)
279 sdirs = []
279 sdirs = []
280 sdirslen = 0
280 sdirslen = 0
281 for p in parts[:-1]:
281 for p in parts[:-1]:
282 d = p[:_dirprefixlen]
282 d = p[:_dirprefixlen]
283 if d[-1] in b'. ':
283 if d[-1] in b'. ':
284 # Windows can't access dirs ending in period or space
284 # Windows can't access dirs ending in period or space
285 d = d[:-1] + b'_'
285 d = d[:-1] + b'_'
286 if sdirslen == 0:
286 if sdirslen == 0:
287 t = len(d)
287 t = len(d)
288 else:
288 else:
289 t = sdirslen + 1 + len(d)
289 t = sdirslen + 1 + len(d)
290 if t > _maxshortdirslen:
290 if t > _maxshortdirslen:
291 break
291 break
292 sdirs.append(d)
292 sdirs.append(d)
293 sdirslen = t
293 sdirslen = t
294 dirs = b'/'.join(sdirs)
294 dirs = b'/'.join(sdirs)
295 if len(dirs) > 0:
295 if len(dirs) > 0:
296 dirs += b'/'
296 dirs += b'/'
297 res = b'dh/' + dirs + digest + ext
297 res = b'dh/' + dirs + digest + ext
298 spaceleft = _maxstorepathlen - len(res)
298 spaceleft = _maxstorepathlen - len(res)
299 if spaceleft > 0:
299 if spaceleft > 0:
300 filler = basename[:spaceleft]
300 filler = basename[:spaceleft]
301 res = b'dh/' + dirs + filler + digest + ext
301 res = b'dh/' + dirs + filler + digest + ext
302 return res
302 return res
303
303
304
304
305 def _hybridencode(path, dotencode):
305 def _hybridencode(path, dotencode):
306 """encodes path with a length limit
306 """encodes path with a length limit
307
307
308 Encodes all paths that begin with 'data/', according to the following.
308 Encodes all paths that begin with 'data/', according to the following.
309
309
310 Default encoding (reversible):
310 Default encoding (reversible):
311
311
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 characters are encoded as '~xx', where xx is the two digit hex code
313 characters are encoded as '~xx', where xx is the two digit hex code
314 of the character (see encodefilename).
314 of the character (see encodefilename).
315 Relevant path components consisting of Windows reserved filenames are
315 Relevant path components consisting of Windows reserved filenames are
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317
317
318 Hashed encoding (not reversible):
318 Hashed encoding (not reversible):
319
319
320 If the default-encoded path is longer than _maxstorepathlen, a
320 If the default-encoded path is longer than _maxstorepathlen, a
321 non-reversible hybrid hashing of the path is done instead.
321 non-reversible hybrid hashing of the path is done instead.
322 This encoding uses up to _dirprefixlen characters of all directory
322 This encoding uses up to _dirprefixlen characters of all directory
323 levels of the lowerencoded path, but not more levels than can fit into
323 levels of the lowerencoded path, but not more levels than can fit into
324 _maxshortdirslen.
324 _maxshortdirslen.
325 Then follows the filler followed by the sha digest of the full path.
325 Then follows the filler followed by the sha digest of the full path.
326 The filler is the beginning of the basename of the lowerencoded path
326 The filler is the beginning of the basename of the lowerencoded path
327 (the basename is everything after the last path separator). The filler
327 (the basename is everything after the last path separator). The filler
328 is as long as possible, filling in characters from the basename until
328 is as long as possible, filling in characters from the basename until
329 the encoded path has _maxstorepathlen characters (or all chars of the
329 the encoded path has _maxstorepathlen characters (or all chars of the
330 basename have been taken).
330 basename have been taken).
331 The extension (e.g. '.i' or '.d') is preserved.
331 The extension (e.g. '.i' or '.d') is preserved.
332
332
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 encoding was used.
334 encoding was used.
335 """
335 """
336 path = encodedir(path)
336 path = encodedir(path)
337 ef = _encodefname(path).split(b'/')
337 ef = _encodefname(path).split(b'/')
338 res = b'/'.join(_auxencode(ef, dotencode))
338 res = b'/'.join(_auxencode(ef, dotencode))
339 if len(res) > _maxstorepathlen:
339 if len(res) > _maxstorepathlen:
340 res = _hashencode(path, dotencode)
340 res = _hashencode(path, dotencode)
341 return res
341 return res
342
342
343
343
344 def _pathencode(path):
344 def _pathencode(path):
345 de = encodedir(path)
345 de = encodedir(path)
346 if len(path) > _maxstorepathlen:
346 if len(path) > _maxstorepathlen:
347 return _hashencode(de, True)
347 return _hashencode(de, True)
348 ef = _encodefname(de).split(b'/')
348 ef = _encodefname(de).split(b'/')
349 res = b'/'.join(_auxencode(ef, True))
349 res = b'/'.join(_auxencode(ef, True))
350 if len(res) > _maxstorepathlen:
350 if len(res) > _maxstorepathlen:
351 return _hashencode(de, True)
351 return _hashencode(de, True)
352 return res
352 return res
353
353
354
354
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356
356
357
357
358 def _plainhybridencode(f):
358 def _plainhybridencode(f):
359 return _hybridencode(f, False)
359 return _hybridencode(f, False)
360
360
361
361
362 def _calcmode(vfs):
362 def _calcmode(vfs):
363 try:
363 try:
364 # files in .hg/ will be created using this mode
364 # files in .hg/ will be created using this mode
365 mode = vfs.stat().st_mode
365 mode = vfs.stat().st_mode
366 # avoid some useless chmods
366 # avoid some useless chmods
367 if (0o777 & ~util.umask) == (0o777 & mode):
367 if (0o777 & ~util.umask) == (0o777 & mode):
368 mode = None
368 mode = None
369 except OSError:
369 except OSError:
370 mode = None
370 mode = None
371 return mode
371 return mode
372
372
373
373
374 _data = [
374 _data = [
375 b'bookmarks',
375 b'bookmarks',
376 b'narrowspec',
376 b'narrowspec',
377 b'data',
377 b'data',
378 b'meta',
378 b'meta',
379 b'00manifest.d',
379 b'00manifest.d',
380 b'00manifest.i',
380 b'00manifest.i',
381 b'00changelog.d',
381 b'00changelog.d',
382 b'00changelog.i',
382 b'00changelog.i',
383 b'phaseroots',
383 b'phaseroots',
384 b'obsstore',
384 b'obsstore',
385 b'requires',
385 b'requires',
386 ]
386 ]
387
387
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_OTHER_EXT = (
389 REVLOG_FILES_OTHER_EXT = (
390 b'.idx',
390 b'.idx',
391 b'.d',
391 b'.d',
392 b'.dat',
392 b'.dat',
393 b'.n',
393 b'.n',
394 b'.nd',
394 b'.nd',
395 b'.sda',
395 b'.sda',
396 )
396 )
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 REVLOG_FILES_LONG_EXT = (
398 REVLOG_FILES_LONG_EXT = (
399 b'.nd',
399 b'.nd',
400 b'.idx',
400 b'.idx',
401 b'.dat',
401 b'.dat',
402 b'.sda',
402 b'.sda',
403 )
403 )
404 # files that are "volatile" and might change between listing and streaming
404 # files that are "volatile" and might change between listing and streaming
405 #
405 #
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # deleted.
407 # deleted.
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409
409
410 # some exception to the above matching
410 # some exception to the above matching
411 #
411 #
412 # XXX This is currently not in use because of issue6542
412 # XXX This is currently not in use because of issue6542
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414
414
415
415
416 def is_revlog(f, kind, st):
416 def is_revlog(f, kind, st):
417 if kind != stat.S_IFREG:
417 if kind != stat.S_IFREG:
418 return None
418 return None
419 return revlog_type(f)
419 return revlog_type(f)
420
420
421
421
422 def revlog_type(f):
422 def revlog_type(f):
423 # XXX we need to filter `undo.` created by the transaction here, however
423 # XXX we need to filter `undo.` created by the transaction here, however
424 # being naive about it also filter revlog for `undo.*` files, leading to
424 # being naive about it also filter revlog for `undo.*` files, leading to
425 # issue6542. So we no longer use EXCLUDED.
425 # issue6542. So we no longer use EXCLUDED.
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 return FILEFLAGS_REVLOG_MAIN
427 return FILEFLAGS_REVLOG_MAIN
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 t = FILETYPE_FILELOG_OTHER
429 t = FILETYPE_FILELOG_OTHER
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 t |= FILEFLAGS_VOLATILE
431 t |= FILEFLAGS_VOLATILE
432 return t
432 return t
433 return None
433 return None
434
434
435
435
436 # the file is part of changelog data
436 # the file is part of changelog data
437 FILEFLAGS_CHANGELOG = 1 << 13
437 FILEFLAGS_CHANGELOG = 1 << 13
438 # the file is part of manifest data
438 # the file is part of manifest data
439 FILEFLAGS_MANIFESTLOG = 1 << 12
439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 # the file is part of filelog data
440 # the file is part of filelog data
441 FILEFLAGS_FILELOG = 1 << 11
441 FILEFLAGS_FILELOG = 1 << 11
442 # file that are not directly part of a revlog
442 # file that are not directly part of a revlog
443 FILEFLAGS_OTHER = 1 << 10
443 FILEFLAGS_OTHER = 1 << 10
444
444
445 # the main entry point for a revlog
445 # the main entry point for a revlog
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 # a secondary file for a revlog
447 # a secondary file for a revlog
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449
449
450 # files that are "volatile" and might change between listing and streaming
450 # files that are "volatile" and might change between listing and streaming
451 FILEFLAGS_VOLATILE = 1 << 20
451 FILEFLAGS_VOLATILE = 1 << 20
452
452
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
460
460
461
461
462 @attr.s(slots=True, init=False)
462 @attr.s(slots=True, init=False)
463 class BaseStoreEntry:
463 class BaseStoreEntry:
464 """An entry in the store
464 """An entry in the store
465
465
466 This is returned by `store.walk` and represent some data in the store."""
466 This is returned by `store.walk` and represent some data in the store."""
467
467
468
468
469 @attr.s(slots=True, init=False)
469 @attr.s(slots=True, init=False)
470 class SimpleStoreEntry(BaseStoreEntry):
470 class SimpleStoreEntry(BaseStoreEntry):
471 """A generic entry in the store"""
471 """A generic entry in the store"""
472
472
473 is_revlog = False
473 is_revlog = False
474
474
475 _entry_path = attr.ib()
475 _entry_path = attr.ib()
476 _is_volatile = attr.ib(default=False)
476 _is_volatile = attr.ib(default=False)
477 _file_size = attr.ib(default=None)
477 _file_size = attr.ib(default=None)
478 _files = attr.ib(default=None)
478
479
479 def __init__(
480 def __init__(
480 self,
481 self,
481 entry_path,
482 entry_path,
482 is_volatile=False,
483 is_volatile=False,
483 file_size=None,
484 file_size=None,
484 ):
485 ):
485 super().__init__()
486 super().__init__()
486 self._entry_path = entry_path
487 self._entry_path = entry_path
487 self._is_volatile = is_volatile
488 self._is_volatile = is_volatile
488 self._file_size = file_size
489 self._file_size = file_size
490 self._files = None
489
491
490 def files(self):
492 def files(self):
491 return [
493 if self._files is None:
492 StoreFile(
494 self._files = [
493 unencoded_path=self._entry_path,
495 StoreFile(
494 file_size=self._file_size,
496 unencoded_path=self._entry_path,
495 is_volatile=self._is_volatile,
497 file_size=self._file_size,
496 )
498 is_volatile=self._is_volatile,
497 ]
499 )
500 ]
501 return self._files
498
502
499
503
500 @attr.s(slots=True, init=False)
504 @attr.s(slots=True, init=False)
501 class RevlogStoreEntry(BaseStoreEntry):
505 class RevlogStoreEntry(BaseStoreEntry):
502 """A revlog entry in the store"""
506 """A revlog entry in the store"""
503
507
504 is_revlog = True
508 is_revlog = True
505
509
506 revlog_type = attr.ib(default=None)
510 revlog_type = attr.ib(default=None)
507 target_id = attr.ib(default=None)
511 target_id = attr.ib(default=None)
508 _path_prefix = attr.ib(default=None)
512 _path_prefix = attr.ib(default=None)
509 _details = attr.ib(default=None)
513 _details = attr.ib(default=None)
514 _files = attr.ib(default=None)
510
515
511 def __init__(
516 def __init__(
512 self,
517 self,
513 revlog_type,
518 revlog_type,
514 path_prefix,
519 path_prefix,
515 target_id,
520 target_id,
516 details,
521 details,
517 ):
522 ):
518 super().__init__()
523 super().__init__()
519 self.revlog_type = revlog_type
524 self.revlog_type = revlog_type
520 self.target_id = target_id
525 self.target_id = target_id
521 self._path_prefix = path_prefix
526 self._path_prefix = path_prefix
522 assert b'.i' in details, (path_prefix, details)
527 assert b'.i' in details, (path_prefix, details)
523 self._details = details
528 self._details = details
529 self._files = None
524
530
525 @property
531 @property
526 def is_changelog(self):
532 def is_changelog(self):
527 return self.revlog_type & FILEFLAGS_CHANGELOG
533 return self.revlog_type & FILEFLAGS_CHANGELOG
528
534
529 @property
535 @property
530 def is_manifestlog(self):
536 def is_manifestlog(self):
531 return self.revlog_type & FILEFLAGS_MANIFESTLOG
537 return self.revlog_type & FILEFLAGS_MANIFESTLOG
532
538
533 @property
539 @property
534 def is_filelog(self):
540 def is_filelog(self):
535 return self.revlog_type & FILEFLAGS_FILELOG
541 return self.revlog_type & FILEFLAGS_FILELOG
536
542
537 def main_file_path(self):
543 def main_file_path(self):
538 """unencoded path of the main revlog file"""
544 """unencoded path of the main revlog file"""
539 return self._path_prefix + b'.i'
545 return self._path_prefix + b'.i'
540
546
541 def files(self):
547 def files(self):
542 files = []
548 if self._files is None:
543 for ext in sorted(self._details, key=_ext_key):
549 self._files = []
544 path = self._path_prefix + ext
550 for ext in sorted(self._details, key=_ext_key):
545 data = self._details[ext]
551 path = self._path_prefix + ext
546 files.append(StoreFile(unencoded_path=path, **data))
552 data = self._details[ext]
547 return files
553 self._files.append(StoreFile(unencoded_path=path, **data))
554 return self._files
548
555
549 def get_revlog_instance(self, repo):
556 def get_revlog_instance(self, repo):
550 """Obtain a revlog instance from this store entry
557 """Obtain a revlog instance from this store entry
551
558
552 An instance of the appropriate class is returned.
559 An instance of the appropriate class is returned.
553 """
560 """
554 if self.is_changelog:
561 if self.is_changelog:
555 return changelog.changelog(repo.svfs)
562 return changelog.changelog(repo.svfs)
556 elif self.is_manifestlog:
563 elif self.is_manifestlog:
557 mandir = self.target_id
564 mandir = self.target_id
558 return manifest.manifestrevlog(
565 return manifest.manifestrevlog(
559 repo.nodeconstants, repo.svfs, tree=mandir
566 repo.nodeconstants, repo.svfs, tree=mandir
560 )
567 )
561 else:
568 else:
562 return filelog.filelog(repo.svfs, self.target_id)
569 return filelog.filelog(repo.svfs, self.target_id)
563
570
564
571
565 @attr.s(slots=True)
572 @attr.s(slots=True)
566 class StoreFile:
573 class StoreFile:
567 """a file matching an entry"""
574 """a file matching an entry"""
568
575
569 unencoded_path = attr.ib()
576 unencoded_path = attr.ib()
570 _file_size = attr.ib(default=None)
577 _file_size = attr.ib(default=None)
571 is_volatile = attr.ib(default=False)
578 is_volatile = attr.ib(default=False)
572
579
573 def file_size(self, vfs):
580 def file_size(self, vfs):
574 if self._file_size is not None:
581 if self._file_size is not None:
575 return self._file_size
582 return self._file_size
576 try:
583 try:
577 return vfs.stat(self.unencoded_path).st_size
584 return vfs.stat(self.unencoded_path).st_size
578 except FileNotFoundError:
585 except FileNotFoundError:
579 return 0
586 return 0
580
587
581
588
582 def _gather_revlog(files_data):
589 def _gather_revlog(files_data):
583 """group files per revlog prefix
590 """group files per revlog prefix
584
591
585 The returns a two level nested dict. The top level key is the revlog prefix
592 The returns a two level nested dict. The top level key is the revlog prefix
586 without extension, the second level is all the file "suffix" that were
593 without extension, the second level is all the file "suffix" that were
587 seen for this revlog and arbitrary file data as value.
594 seen for this revlog and arbitrary file data as value.
588 """
595 """
589 revlogs = collections.defaultdict(dict)
596 revlogs = collections.defaultdict(dict)
590 for u, value in files_data:
597 for u, value in files_data:
591 name, ext = _split_revlog_ext(u)
598 name, ext = _split_revlog_ext(u)
592 revlogs[name][ext] = value
599 revlogs[name][ext] = value
593 return sorted(revlogs.items())
600 return sorted(revlogs.items())
594
601
595
602
596 def _split_revlog_ext(filename):
603 def _split_revlog_ext(filename):
597 """split the revlog file prefix from the variable extension"""
604 """split the revlog file prefix from the variable extension"""
598 if filename.endswith(REVLOG_FILES_LONG_EXT):
605 if filename.endswith(REVLOG_FILES_LONG_EXT):
599 char = b'-'
606 char = b'-'
600 else:
607 else:
601 char = b'.'
608 char = b'.'
602 idx = filename.rfind(char)
609 idx = filename.rfind(char)
603 return filename[:idx], filename[idx:]
610 return filename[:idx], filename[idx:]
604
611
605
612
606 def _ext_key(ext):
613 def _ext_key(ext):
607 """a key to order revlog suffix
614 """a key to order revlog suffix
608
615
609 important to issue .i after other entry."""
616 important to issue .i after other entry."""
610 # the only important part of this order is to keep the `.i` last.
617 # the only important part of this order is to keep the `.i` last.
611 if ext.endswith(b'.n'):
618 if ext.endswith(b'.n'):
612 return (0, ext)
619 return (0, ext)
613 elif ext.endswith(b'.nd'):
620 elif ext.endswith(b'.nd'):
614 return (10, ext)
621 return (10, ext)
615 elif ext.endswith(b'.d'):
622 elif ext.endswith(b'.d'):
616 return (20, ext)
623 return (20, ext)
617 elif ext.endswith(b'.i'):
624 elif ext.endswith(b'.i'):
618 return (50, ext)
625 return (50, ext)
619 else:
626 else:
620 return (40, ext)
627 return (40, ext)
621
628
622
629
623 class basicstore:
630 class basicstore:
624 '''base class for local repository stores'''
631 '''base class for local repository stores'''
625
632
626 def __init__(self, path, vfstype):
633 def __init__(self, path, vfstype):
627 vfs = vfstype(path)
634 vfs = vfstype(path)
628 self.path = vfs.base
635 self.path = vfs.base
629 self.createmode = _calcmode(vfs)
636 self.createmode = _calcmode(vfs)
630 vfs.createmode = self.createmode
637 vfs.createmode = self.createmode
631 self.rawvfs = vfs
638 self.rawvfs = vfs
632 self.vfs = vfsmod.filtervfs(vfs, encodedir)
639 self.vfs = vfsmod.filtervfs(vfs, encodedir)
633 self.opener = self.vfs
640 self.opener = self.vfs
634
641
635 def join(self, f):
642 def join(self, f):
636 return self.path + b'/' + encodedir(f)
643 return self.path + b'/' + encodedir(f)
637
644
638 def _walk(self, relpath, recurse, undecodable=None):
645 def _walk(self, relpath, recurse, undecodable=None):
639 '''yields (revlog_type, unencoded, size)'''
646 '''yields (revlog_type, unencoded, size)'''
640 path = self.path
647 path = self.path
641 if relpath:
648 if relpath:
642 path += b'/' + relpath
649 path += b'/' + relpath
643 striplen = len(self.path) + 1
650 striplen = len(self.path) + 1
644 l = []
651 l = []
645 if self.rawvfs.isdir(path):
652 if self.rawvfs.isdir(path):
646 visit = [path]
653 visit = [path]
647 readdir = self.rawvfs.readdir
654 readdir = self.rawvfs.readdir
648 while visit:
655 while visit:
649 p = visit.pop()
656 p = visit.pop()
650 for f, kind, st in readdir(p, stat=True):
657 for f, kind, st in readdir(p, stat=True):
651 fp = p + b'/' + f
658 fp = p + b'/' + f
652 rl_type = is_revlog(f, kind, st)
659 rl_type = is_revlog(f, kind, st)
653 if rl_type is not None:
660 if rl_type is not None:
654 n = util.pconvert(fp[striplen:])
661 n = util.pconvert(fp[striplen:])
655 l.append((decodedir(n), (rl_type, st.st_size)))
662 l.append((decodedir(n), (rl_type, st.st_size)))
656 elif kind == stat.S_IFDIR and recurse:
663 elif kind == stat.S_IFDIR and recurse:
657 visit.append(fp)
664 visit.append(fp)
658
665
659 l.sort()
666 l.sort()
660 return l
667 return l
661
668
662 def changelog(self, trypending, concurrencychecker=None):
669 def changelog(self, trypending, concurrencychecker=None):
663 return changelog.changelog(
670 return changelog.changelog(
664 self.vfs,
671 self.vfs,
665 trypending=trypending,
672 trypending=trypending,
666 concurrencychecker=concurrencychecker,
673 concurrencychecker=concurrencychecker,
667 )
674 )
668
675
669 def manifestlog(self, repo, storenarrowmatch):
676 def manifestlog(self, repo, storenarrowmatch):
670 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
677 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
671 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
678 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
672
679
673 def data_entries(
680 def data_entries(
674 self, matcher=None, undecodable=None
681 self, matcher=None, undecodable=None
675 ) -> Generator[BaseStoreEntry, None, None]:
682 ) -> Generator[BaseStoreEntry, None, None]:
676 """Like walk, but excluding the changelog and root manifest.
683 """Like walk, but excluding the changelog and root manifest.
677
684
678 When [undecodable] is None, revlogs names that can't be
685 When [undecodable] is None, revlogs names that can't be
679 decoded cause an exception. When it is provided, it should
686 decoded cause an exception. When it is provided, it should
680 be a list and the filenames that can't be decoded are added
687 be a list and the filenames that can't be decoded are added
681 to it instead. This is very rarely needed."""
688 to it instead. This is very rarely needed."""
682 dirs = [
689 dirs = [
683 (b'data', FILEFLAGS_FILELOG, False),
690 (b'data', FILEFLAGS_FILELOG, False),
684 (b'meta', FILEFLAGS_MANIFESTLOG, True),
691 (b'meta', FILEFLAGS_MANIFESTLOG, True),
685 ]
692 ]
686 for base_dir, rl_type, strip_filename in dirs:
693 for base_dir, rl_type, strip_filename in dirs:
687 files = self._walk(base_dir, True, undecodable=undecodable)
694 files = self._walk(base_dir, True, undecodable=undecodable)
688 files = (f for f in files if f[1][0] is not None)
695 files = (f for f in files if f[1][0] is not None)
689 for revlog, details in _gather_revlog(files):
696 for revlog, details in _gather_revlog(files):
690 file_details = {}
697 file_details = {}
691 revlog_target_id = revlog.split(b'/', 1)[1]
698 revlog_target_id = revlog.split(b'/', 1)[1]
692 if strip_filename and b'/' in revlog:
699 if strip_filename and b'/' in revlog:
693 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
700 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
694 revlog_target_id += b'/'
701 revlog_target_id += b'/'
695 for ext, (t, s) in sorted(details.items()):
702 for ext, (t, s) in sorted(details.items()):
696 file_details[ext] = {
703 file_details[ext] = {
697 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
704 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
698 'file_size': s,
705 'file_size': s,
699 }
706 }
700 yield RevlogStoreEntry(
707 yield RevlogStoreEntry(
701 path_prefix=revlog,
708 path_prefix=revlog,
702 revlog_type=rl_type,
709 revlog_type=rl_type,
703 target_id=revlog_target_id,
710 target_id=revlog_target_id,
704 details=file_details,
711 details=file_details,
705 )
712 )
706
713
707 def top_entries(
714 def top_entries(
708 self, phase=False, obsolescence=False
715 self, phase=False, obsolescence=False
709 ) -> Generator[BaseStoreEntry, None, None]:
716 ) -> Generator[BaseStoreEntry, None, None]:
710 if phase and self.vfs.exists(b'phaseroots'):
717 if phase and self.vfs.exists(b'phaseroots'):
711 yield SimpleStoreEntry(
718 yield SimpleStoreEntry(
712 entry_path=b'phaseroots',
719 entry_path=b'phaseroots',
713 is_volatile=True,
720 is_volatile=True,
714 )
721 )
715
722
716 if obsolescence and self.vfs.exists(b'obsstore'):
723 if obsolescence and self.vfs.exists(b'obsstore'):
717 # XXX if we had the file size it could be non-volatile
724 # XXX if we had the file size it could be non-volatile
718 yield SimpleStoreEntry(
725 yield SimpleStoreEntry(
719 entry_path=b'obsstore',
726 entry_path=b'obsstore',
720 is_volatile=True,
727 is_volatile=True,
721 )
728 )
722
729
723 files = reversed(self._walk(b'', False))
730 files = reversed(self._walk(b'', False))
724
731
725 changelogs = collections.defaultdict(dict)
732 changelogs = collections.defaultdict(dict)
726 manifestlogs = collections.defaultdict(dict)
733 manifestlogs = collections.defaultdict(dict)
727
734
728 for u, (t, s) in files:
735 for u, (t, s) in files:
729 if u.startswith(b'00changelog'):
736 if u.startswith(b'00changelog'):
730 name, ext = _split_revlog_ext(u)
737 name, ext = _split_revlog_ext(u)
731 changelogs[name][ext] = (t, s)
738 changelogs[name][ext] = (t, s)
732 elif u.startswith(b'00manifest'):
739 elif u.startswith(b'00manifest'):
733 name, ext = _split_revlog_ext(u)
740 name, ext = _split_revlog_ext(u)
734 manifestlogs[name][ext] = (t, s)
741 manifestlogs[name][ext] = (t, s)
735 else:
742 else:
736 yield SimpleStoreEntry(
743 yield SimpleStoreEntry(
737 entry_path=u,
744 entry_path=u,
738 is_volatile=bool(t & FILEFLAGS_VOLATILE),
745 is_volatile=bool(t & FILEFLAGS_VOLATILE),
739 file_size=s,
746 file_size=s,
740 )
747 )
741 # yield manifest before changelog
748 # yield manifest before changelog
742 top_rl = [
749 top_rl = [
743 (manifestlogs, FILEFLAGS_MANIFESTLOG),
750 (manifestlogs, FILEFLAGS_MANIFESTLOG),
744 (changelogs, FILEFLAGS_CHANGELOG),
751 (changelogs, FILEFLAGS_CHANGELOG),
745 ]
752 ]
746 assert len(manifestlogs) <= 1
753 assert len(manifestlogs) <= 1
747 assert len(changelogs) <= 1
754 assert len(changelogs) <= 1
748 for data, revlog_type in top_rl:
755 for data, revlog_type in top_rl:
749 for revlog, details in sorted(data.items()):
756 for revlog, details in sorted(data.items()):
750 file_details = {}
757 file_details = {}
751 for ext, (t, s) in details.items():
758 for ext, (t, s) in details.items():
752 file_details[ext] = {
759 file_details[ext] = {
753 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
760 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
754 'file_size': s,
761 'file_size': s,
755 }
762 }
756 yield RevlogStoreEntry(
763 yield RevlogStoreEntry(
757 path_prefix=revlog,
764 path_prefix=revlog,
758 revlog_type=revlog_type,
765 revlog_type=revlog_type,
759 target_id=b'',
766 target_id=b'',
760 details=file_details,
767 details=file_details,
761 )
768 )
762
769
763 def walk(
770 def walk(
764 self, matcher=None, phase=False, obsolescence=False
771 self, matcher=None, phase=False, obsolescence=False
765 ) -> Generator[BaseStoreEntry, None, None]:
772 ) -> Generator[BaseStoreEntry, None, None]:
766 """return files related to data storage (ie: revlogs)
773 """return files related to data storage (ie: revlogs)
767
774
768 yields instance from BaseStoreEntry subclasses
775 yields instance from BaseStoreEntry subclasses
769
776
770 if a matcher is passed, storage files of only those tracked paths
777 if a matcher is passed, storage files of only those tracked paths
771 are passed with matches the matcher
778 are passed with matches the matcher
772 """
779 """
773 # yield data files first
780 # yield data files first
774 for x in self.data_entries(matcher):
781 for x in self.data_entries(matcher):
775 yield x
782 yield x
776 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
783 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
777 yield x
784 yield x
778
785
779 def copylist(self):
786 def copylist(self):
780 return _data
787 return _data
781
788
782 def write(self, tr):
789 def write(self, tr):
783 pass
790 pass
784
791
785 def invalidatecaches(self):
792 def invalidatecaches(self):
786 pass
793 pass
787
794
788 def markremoved(self, fn):
795 def markremoved(self, fn):
789 pass
796 pass
790
797
791 def __contains__(self, path):
798 def __contains__(self, path):
792 '''Checks if the store contains path'''
799 '''Checks if the store contains path'''
793 path = b"/".join((b"data", path))
800 path = b"/".join((b"data", path))
794 # file?
801 # file?
795 if self.vfs.exists(path + b".i"):
802 if self.vfs.exists(path + b".i"):
796 return True
803 return True
797 # dir?
804 # dir?
798 if not path.endswith(b"/"):
805 if not path.endswith(b"/"):
799 path = path + b"/"
806 path = path + b"/"
800 return self.vfs.exists(path)
807 return self.vfs.exists(path)
801
808
802
809
803 class encodedstore(basicstore):
810 class encodedstore(basicstore):
804 def __init__(self, path, vfstype):
811 def __init__(self, path, vfstype):
805 vfs = vfstype(path + b'/store')
812 vfs = vfstype(path + b'/store')
806 self.path = vfs.base
813 self.path = vfs.base
807 self.createmode = _calcmode(vfs)
814 self.createmode = _calcmode(vfs)
808 vfs.createmode = self.createmode
815 vfs.createmode = self.createmode
809 self.rawvfs = vfs
816 self.rawvfs = vfs
810 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
817 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
811 self.opener = self.vfs
818 self.opener = self.vfs
812
819
813 def _walk(self, relpath, recurse, undecodable=None):
820 def _walk(self, relpath, recurse, undecodable=None):
814 old = super()._walk(relpath, recurse)
821 old = super()._walk(relpath, recurse)
815 new = []
822 new = []
816 for f1, value in old:
823 for f1, value in old:
817 try:
824 try:
818 f2 = decodefilename(f1)
825 f2 = decodefilename(f1)
819 except KeyError:
826 except KeyError:
820 if undecodable is None:
827 if undecodable is None:
821 msg = _(b'undecodable revlog name %s') % f1
828 msg = _(b'undecodable revlog name %s') % f1
822 raise error.StorageError(msg)
829 raise error.StorageError(msg)
823 else:
830 else:
824 undecodable.append(f1)
831 undecodable.append(f1)
825 continue
832 continue
826 new.append((f2, value))
833 new.append((f2, value))
827 return new
834 return new
828
835
829 def data_entries(
836 def data_entries(
830 self, matcher=None, undecodable=None
837 self, matcher=None, undecodable=None
831 ) -> Generator[BaseStoreEntry, None, None]:
838 ) -> Generator[BaseStoreEntry, None, None]:
832 entries = super(encodedstore, self).data_entries(
839 entries = super(encodedstore, self).data_entries(
833 undecodable=undecodable
840 undecodable=undecodable
834 )
841 )
835 for entry in entries:
842 for entry in entries:
836 if _match_tracked_entry(entry, matcher):
843 if _match_tracked_entry(entry, matcher):
837 yield entry
844 yield entry
838
845
839 def join(self, f):
846 def join(self, f):
840 return self.path + b'/' + encodefilename(f)
847 return self.path + b'/' + encodefilename(f)
841
848
842 def copylist(self):
849 def copylist(self):
843 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
850 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
844
851
845
852
846 class fncache:
853 class fncache:
847 # the filename used to be partially encoded
854 # the filename used to be partially encoded
848 # hence the encodedir/decodedir dance
855 # hence the encodedir/decodedir dance
849 def __init__(self, vfs):
856 def __init__(self, vfs):
850 self.vfs = vfs
857 self.vfs = vfs
851 self._ignores = set()
858 self._ignores = set()
852 self.entries = None
859 self.entries = None
853 self._dirty = False
860 self._dirty = False
854 # set of new additions to fncache
861 # set of new additions to fncache
855 self.addls = set()
862 self.addls = set()
856
863
857 def ensureloaded(self, warn=None):
864 def ensureloaded(self, warn=None):
858 """read the fncache file if not already read.
865 """read the fncache file if not already read.
859
866
860 If the file on disk is corrupted, raise. If warn is provided,
867 If the file on disk is corrupted, raise. If warn is provided,
861 warn and keep going instead."""
868 warn and keep going instead."""
862 if self.entries is None:
869 if self.entries is None:
863 self._load(warn)
870 self._load(warn)
864
871
865 def _load(self, warn=None):
872 def _load(self, warn=None):
866 '''fill the entries from the fncache file'''
873 '''fill the entries from the fncache file'''
867 self._dirty = False
874 self._dirty = False
868 try:
875 try:
869 fp = self.vfs(b'fncache', mode=b'rb')
876 fp = self.vfs(b'fncache', mode=b'rb')
870 except IOError:
877 except IOError:
871 # skip nonexistent file
878 # skip nonexistent file
872 self.entries = set()
879 self.entries = set()
873 return
880 return
874
881
875 self.entries = set()
882 self.entries = set()
876 chunk = b''
883 chunk = b''
877 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
884 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
878 chunk += c
885 chunk += c
879 try:
886 try:
880 p = chunk.rindex(b'\n')
887 p = chunk.rindex(b'\n')
881 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
888 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
882 chunk = chunk[p + 1 :]
889 chunk = chunk[p + 1 :]
883 except ValueError:
890 except ValueError:
884 # substring '\n' not found, maybe the entry is bigger than the
891 # substring '\n' not found, maybe the entry is bigger than the
885 # chunksize, so let's keep iterating
892 # chunksize, so let's keep iterating
886 pass
893 pass
887
894
888 if chunk:
895 if chunk:
889 msg = _(b"fncache does not ends with a newline")
896 msg = _(b"fncache does not ends with a newline")
890 if warn:
897 if warn:
891 warn(msg + b'\n')
898 warn(msg + b'\n')
892 else:
899 else:
893 raise error.Abort(
900 raise error.Abort(
894 msg,
901 msg,
895 hint=_(
902 hint=_(
896 b"use 'hg debugrebuildfncache' to "
903 b"use 'hg debugrebuildfncache' to "
897 b"rebuild the fncache"
904 b"rebuild the fncache"
898 ),
905 ),
899 )
906 )
900 self._checkentries(fp, warn)
907 self._checkentries(fp, warn)
901 fp.close()
908 fp.close()
902
909
903 def _checkentries(self, fp, warn):
910 def _checkentries(self, fp, warn):
904 """make sure there is no empty string in entries"""
911 """make sure there is no empty string in entries"""
905 if b'' in self.entries:
912 if b'' in self.entries:
906 fp.seek(0)
913 fp.seek(0)
907 for n, line in enumerate(fp):
914 for n, line in enumerate(fp):
908 if not line.rstrip(b'\n'):
915 if not line.rstrip(b'\n'):
909 t = _(b'invalid entry in fncache, line %d') % (n + 1)
916 t = _(b'invalid entry in fncache, line %d') % (n + 1)
910 if warn:
917 if warn:
911 warn(t + b'\n')
918 warn(t + b'\n')
912 else:
919 else:
913 raise error.Abort(t)
920 raise error.Abort(t)
914
921
915 def write(self, tr):
922 def write(self, tr):
916 if self._dirty:
923 if self._dirty:
917 assert self.entries is not None
924 assert self.entries is not None
918 self.entries = self.entries | self.addls
925 self.entries = self.entries | self.addls
919 self.addls = set()
926 self.addls = set()
920 tr.addbackup(b'fncache')
927 tr.addbackup(b'fncache')
921 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
928 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
922 if self.entries:
929 if self.entries:
923 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
930 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
924 fp.close()
931 fp.close()
925 self._dirty = False
932 self._dirty = False
926 if self.addls:
933 if self.addls:
927 # if we have just new entries, let's append them to the fncache
934 # if we have just new entries, let's append them to the fncache
928 tr.addbackup(b'fncache')
935 tr.addbackup(b'fncache')
929 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
936 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
930 if self.addls:
937 if self.addls:
931 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
938 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
932 fp.close()
939 fp.close()
933 self.entries = None
940 self.entries = None
934 self.addls = set()
941 self.addls = set()
935
942
936 def addignore(self, fn):
943 def addignore(self, fn):
937 self._ignores.add(fn)
944 self._ignores.add(fn)
938
945
939 def add(self, fn):
946 def add(self, fn):
940 if fn in self._ignores:
947 if fn in self._ignores:
941 return
948 return
942 if self.entries is None:
949 if self.entries is None:
943 self._load()
950 self._load()
944 if fn not in self.entries:
951 if fn not in self.entries:
945 self.addls.add(fn)
952 self.addls.add(fn)
946
953
947 def remove(self, fn):
954 def remove(self, fn):
948 if self.entries is None:
955 if self.entries is None:
949 self._load()
956 self._load()
950 if fn in self.addls:
957 if fn in self.addls:
951 self.addls.remove(fn)
958 self.addls.remove(fn)
952 return
959 return
953 try:
960 try:
954 self.entries.remove(fn)
961 self.entries.remove(fn)
955 self._dirty = True
962 self._dirty = True
956 except KeyError:
963 except KeyError:
957 pass
964 pass
958
965
959 def __contains__(self, fn):
966 def __contains__(self, fn):
960 if fn in self.addls:
967 if fn in self.addls:
961 return True
968 return True
962 if self.entries is None:
969 if self.entries is None:
963 self._load()
970 self._load()
964 return fn in self.entries
971 return fn in self.entries
965
972
966 def __iter__(self):
973 def __iter__(self):
967 if self.entries is None:
974 if self.entries is None:
968 self._load()
975 self._load()
969 return iter(self.entries | self.addls)
976 return iter(self.entries | self.addls)
970
977
971
978
972 class _fncachevfs(vfsmod.proxyvfs):
979 class _fncachevfs(vfsmod.proxyvfs):
973 def __init__(self, vfs, fnc, encode):
980 def __init__(self, vfs, fnc, encode):
974 vfsmod.proxyvfs.__init__(self, vfs)
981 vfsmod.proxyvfs.__init__(self, vfs)
975 self.fncache = fnc
982 self.fncache = fnc
976 self.encode = encode
983 self.encode = encode
977
984
978 def __call__(self, path, mode=b'r', *args, **kw):
985 def __call__(self, path, mode=b'r', *args, **kw):
979 encoded = self.encode(path)
986 encoded = self.encode(path)
980 if (
987 if (
981 mode not in (b'r', b'rb')
988 mode not in (b'r', b'rb')
982 and (path.startswith(b'data/') or path.startswith(b'meta/'))
989 and (path.startswith(b'data/') or path.startswith(b'meta/'))
983 and revlog_type(path) is not None
990 and revlog_type(path) is not None
984 ):
991 ):
985 # do not trigger a fncache load when adding a file that already is
992 # do not trigger a fncache load when adding a file that already is
986 # known to exist.
993 # known to exist.
987 notload = self.fncache.entries is None and self.vfs.exists(encoded)
994 notload = self.fncache.entries is None and self.vfs.exists(encoded)
988 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
995 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
989 # when appending to an existing file, if the file has size zero,
996 # when appending to an existing file, if the file has size zero,
990 # it should be considered as missing. Such zero-size files are
997 # it should be considered as missing. Such zero-size files are
991 # the result of truncation when a transaction is aborted.
998 # the result of truncation when a transaction is aborted.
992 notload = False
999 notload = False
993 if not notload:
1000 if not notload:
994 self.fncache.add(path)
1001 self.fncache.add(path)
995 return self.vfs(encoded, mode, *args, **kw)
1002 return self.vfs(encoded, mode, *args, **kw)
996
1003
997 def join(self, path):
1004 def join(self, path):
998 if path:
1005 if path:
999 return self.vfs.join(self.encode(path))
1006 return self.vfs.join(self.encode(path))
1000 else:
1007 else:
1001 return self.vfs.join(path)
1008 return self.vfs.join(path)
1002
1009
1003 def register_file(self, path):
1010 def register_file(self, path):
1004 """generic hook point to lets fncache steer its stew"""
1011 """generic hook point to lets fncache steer its stew"""
1005 if path.startswith(b'data/') or path.startswith(b'meta/'):
1012 if path.startswith(b'data/') or path.startswith(b'meta/'):
1006 self.fncache.add(path)
1013 self.fncache.add(path)
1007
1014
1008
1015
1009 class fncachestore(basicstore):
1016 class fncachestore(basicstore):
1010 def __init__(self, path, vfstype, dotencode):
1017 def __init__(self, path, vfstype, dotencode):
1011 if dotencode:
1018 if dotencode:
1012 encode = _pathencode
1019 encode = _pathencode
1013 else:
1020 else:
1014 encode = _plainhybridencode
1021 encode = _plainhybridencode
1015 self.encode = encode
1022 self.encode = encode
1016 vfs = vfstype(path + b'/store')
1023 vfs = vfstype(path + b'/store')
1017 self.path = vfs.base
1024 self.path = vfs.base
1018 self.pathsep = self.path + b'/'
1025 self.pathsep = self.path + b'/'
1019 self.createmode = _calcmode(vfs)
1026 self.createmode = _calcmode(vfs)
1020 vfs.createmode = self.createmode
1027 vfs.createmode = self.createmode
1021 self.rawvfs = vfs
1028 self.rawvfs = vfs
1022 fnc = fncache(vfs)
1029 fnc = fncache(vfs)
1023 self.fncache = fnc
1030 self.fncache = fnc
1024 self.vfs = _fncachevfs(vfs, fnc, encode)
1031 self.vfs = _fncachevfs(vfs, fnc, encode)
1025 self.opener = self.vfs
1032 self.opener = self.vfs
1026
1033
1027 def join(self, f):
1034 def join(self, f):
1028 return self.pathsep + self.encode(f)
1035 return self.pathsep + self.encode(f)
1029
1036
1030 def getsize(self, path):
1037 def getsize(self, path):
1031 return self.rawvfs.stat(path).st_size
1038 return self.rawvfs.stat(path).st_size
1032
1039
1033 def data_entries(
1040 def data_entries(
1034 self, matcher=None, undecodable=None
1041 self, matcher=None, undecodable=None
1035 ) -> Generator[BaseStoreEntry, None, None]:
1042 ) -> Generator[BaseStoreEntry, None, None]:
1036 files = ((f, revlog_type(f)) for f in self.fncache)
1043 files = ((f, revlog_type(f)) for f in self.fncache)
1037 # Note: all files in fncache should be revlog related, However the
1044 # Note: all files in fncache should be revlog related, However the
1038 # fncache might contains such file added by previous version of
1045 # fncache might contains such file added by previous version of
1039 # Mercurial.
1046 # Mercurial.
1040 files = (f for f in files if f[1] is not None)
1047 files = (f for f in files if f[1] is not None)
1041 by_revlog = _gather_revlog(files)
1048 by_revlog = _gather_revlog(files)
1042 for revlog, details in by_revlog:
1049 for revlog, details in by_revlog:
1043 file_details = {}
1050 file_details = {}
1044 if revlog.startswith(b'data/'):
1051 if revlog.startswith(b'data/'):
1045 rl_type = FILEFLAGS_FILELOG
1052 rl_type = FILEFLAGS_FILELOG
1046 revlog_target_id = revlog.split(b'/', 1)[1]
1053 revlog_target_id = revlog.split(b'/', 1)[1]
1047 elif revlog.startswith(b'meta/'):
1054 elif revlog.startswith(b'meta/'):
1048 rl_type = FILEFLAGS_MANIFESTLOG
1055 rl_type = FILEFLAGS_MANIFESTLOG
1049 # drop the initial directory and the `00manifest` file part
1056 # drop the initial directory and the `00manifest` file part
1050 tmp = revlog.split(b'/', 1)[1]
1057 tmp = revlog.split(b'/', 1)[1]
1051 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1058 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1052 else:
1059 else:
1053 # unreachable
1060 # unreachable
1054 assert False, revlog
1061 assert False, revlog
1055 for ext, t in details.items():
1062 for ext, t in details.items():
1056 file_details[ext] = {
1063 file_details[ext] = {
1057 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1064 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1058 }
1065 }
1059 entry = RevlogStoreEntry(
1066 entry = RevlogStoreEntry(
1060 path_prefix=revlog,
1067 path_prefix=revlog,
1061 revlog_type=rl_type,
1068 revlog_type=rl_type,
1062 target_id=revlog_target_id,
1069 target_id=revlog_target_id,
1063 details=file_details,
1070 details=file_details,
1064 )
1071 )
1065 if _match_tracked_entry(entry, matcher):
1072 if _match_tracked_entry(entry, matcher):
1066 yield entry
1073 yield entry
1067
1074
1068 def copylist(self):
1075 def copylist(self):
1069 d = (
1076 d = (
1070 b'bookmarks',
1077 b'bookmarks',
1071 b'narrowspec',
1078 b'narrowspec',
1072 b'data',
1079 b'data',
1073 b'meta',
1080 b'meta',
1074 b'dh',
1081 b'dh',
1075 b'fncache',
1082 b'fncache',
1076 b'phaseroots',
1083 b'phaseroots',
1077 b'obsstore',
1084 b'obsstore',
1078 b'00manifest.d',
1085 b'00manifest.d',
1079 b'00manifest.i',
1086 b'00manifest.i',
1080 b'00changelog.d',
1087 b'00changelog.d',
1081 b'00changelog.i',
1088 b'00changelog.i',
1082 b'requires',
1089 b'requires',
1083 )
1090 )
1084 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1091 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1085
1092
1086 def write(self, tr):
1093 def write(self, tr):
1087 self.fncache.write(tr)
1094 self.fncache.write(tr)
1088
1095
1089 def invalidatecaches(self):
1096 def invalidatecaches(self):
1090 self.fncache.entries = None
1097 self.fncache.entries = None
1091 self.fncache.addls = set()
1098 self.fncache.addls = set()
1092
1099
1093 def markremoved(self, fn):
1100 def markremoved(self, fn):
1094 self.fncache.remove(fn)
1101 self.fncache.remove(fn)
1095
1102
1096 def _exists(self, f):
1103 def _exists(self, f):
1097 ef = self.encode(f)
1104 ef = self.encode(f)
1098 try:
1105 try:
1099 self.getsize(ef)
1106 self.getsize(ef)
1100 return True
1107 return True
1101 except FileNotFoundError:
1108 except FileNotFoundError:
1102 return False
1109 return False
1103
1110
1104 def __contains__(self, path):
1111 def __contains__(self, path):
1105 '''Checks if the store contains path'''
1112 '''Checks if the store contains path'''
1106 path = b"/".join((b"data", path))
1113 path = b"/".join((b"data", path))
1107 # check for files (exact match)
1114 # check for files (exact match)
1108 e = path + b'.i'
1115 e = path + b'.i'
1109 if e in self.fncache and self._exists(e):
1116 if e in self.fncache and self._exists(e):
1110 return True
1117 return True
1111 # now check for directories (prefix match)
1118 # now check for directories (prefix match)
1112 if not path.endswith(b'/'):
1119 if not path.endswith(b'/'):
1113 path += b'/'
1120 path += b'/'
1114 for e in self.fncache:
1121 for e in self.fncache:
1115 if e.startswith(path) and self._exists(e):
1122 if e.startswith(path) and self._exists(e):
1116 return True
1123 return True
1117 return False
1124 return False
General Comments 0
You need to be logged in to leave comments. Login now