##// END OF EJS Templates
store: properly compute the targer_id of manifestlog in no-fncache walk...
marmoute -
r51522:309cbd84 default
parent child Browse files
Show More
@@ -1,1114 +1,1117 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 filelog,
22 filelog,
23 manifest,
23 manifest,
24 policy,
24 policy,
25 pycompat,
25 pycompat,
26 util,
26 util,
27 vfs as vfsmod,
27 vfs as vfsmod,
28 )
28 )
29 from .utils import hashutil
29 from .utils import hashutil
30
30
31 parsers = policy.importmod('parsers')
31 parsers = policy.importmod('parsers')
32 # how much bytes should be read from fncache in one read
32 # how much bytes should be read from fncache in one read
33 # It is done to prevent loading large fncache files into memory
33 # It is done to prevent loading large fncache files into memory
34 fncache_chunksize = 10 ** 6
34 fncache_chunksize = 10 ** 6
35
35
36
36
37 def _match_tracked_entry(entry, matcher):
37 def _match_tracked_entry(entry, matcher):
38 """parses a fncache entry and returns whether the entry is tracking a path
38 """parses a fncache entry and returns whether the entry is tracking a path
39 matched by matcher or not.
39 matched by matcher or not.
40
40
41 If matcher is None, returns True"""
41 If matcher is None, returns True"""
42
42
43 if matcher is None:
43 if matcher is None:
44 return True
44 return True
45 if entry.is_filelog:
45 if entry.is_filelog:
46 return matcher(entry.target_id)
46 return matcher(entry.target_id)
47 elif entry.is_manifestlog:
47 elif entry.is_manifestlog:
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50
50
51
51
52 # This avoids a collision between a file named foo and a dir named
52 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
53 # foo.i or foo.d
54 def _encodedir(path):
54 def _encodedir(path):
55 """
55 """
56 >>> _encodedir(b'data/foo.i')
56 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
57 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
58 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
59 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
61 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 """
64 """
65 return (
65 return (
66 path.replace(b".hg/", b".hg.hg/")
66 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
67 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
68 .replace(b".d/", b".d.hg/")
69 )
69 )
70
70
71
71
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
73
74
74
75 def decodedir(path):
75 def decodedir(path):
76 """
76 """
77 >>> decodedir(b'data/foo.i')
77 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
78 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
80 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
82 'data/foo.i.hg/bla.i'
83 """
83 """
84 if b".hg/" not in path:
84 if b".hg/" not in path:
85 return path
85 return path
86 return (
86 return (
87 path.replace(b".d.hg/", b".d/")
87 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
88 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
89 .replace(b".hg.hg/", b".hg/")
90 )
90 )
91
91
92
92
93 def _reserved():
93 def _reserved():
94 """characters that are problematic for filesystems
94 """characters that are problematic for filesystems
95
95
96 * ascii escapes (0..31)
96 * ascii escapes (0..31)
97 * ascii hi (126..255)
97 * ascii hi (126..255)
98 * windows specials
98 * windows specials
99
99
100 these characters will be escaped by encodefunctions
100 these characters will be escaped by encodefunctions
101 """
101 """
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
103 for x in range(32):
104 yield x
104 yield x
105 for x in range(126, 256):
105 for x in range(126, 256):
106 yield x
106 yield x
107 for x in winreserved:
107 for x in winreserved:
108 yield x
108 yield x
109
109
110
110
111 def _buildencodefun():
111 def _buildencodefun():
112 """
112 """
113 >>> enc, dec = _buildencodefun()
113 >>> enc, dec = _buildencodefun()
114
114
115 >>> enc(b'nothing/special.txt')
115 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
116 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
117 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
118 'nothing/special.txt'
119
119
120 >>> enc(b'HELLO')
120 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
121 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
122 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
123 'HELLO'
124
124
125 >>> enc(b'hello:world?')
125 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
126 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
127 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
128 'hello:world?'
129
129
130 >>> enc(b'the\\x07quick\\xADshot')
130 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
131 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
132 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
133 'the\\x07quick\\xadshot'
134 """
134 """
135 e = b'_'
135 e = b'_'
136 xchr = pycompat.bytechr
136 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
137 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
139
140 cmap = {x: x for x in asciistr}
140 cmap = {x: x for x in asciistr}
141 for x in _reserved():
141 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
142 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
143 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
144 cmap[xchr(x)] = e + xchr(x).lower()
145
145
146 dmap = {}
146 dmap = {}
147 for k, v in cmap.items():
147 for k, v in cmap.items():
148 dmap[v] = k
148 dmap[v] = k
149
149
150 def decode(s):
150 def decode(s):
151 i = 0
151 i = 0
152 while i < len(s):
152 while i < len(s):
153 for l in range(1, 4):
153 for l in range(1, 4):
154 try:
154 try:
155 yield dmap[s[i : i + l]]
155 yield dmap[s[i : i + l]]
156 i += l
156 i += l
157 break
157 break
158 except KeyError:
158 except KeyError:
159 pass
159 pass
160 else:
160 else:
161 raise KeyError
161 raise KeyError
162
162
163 return (
163 return (
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join(list(decode(s))),
165 lambda s: b''.join(list(decode(s))),
166 )
166 )
167
167
168
168
169 _encodefname, _decodefname = _buildencodefun()
169 _encodefname, _decodefname = _buildencodefun()
170
170
171
171
172 def encodefilename(s):
172 def encodefilename(s):
173 """
173 """
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 """
176 """
177 return _encodefname(encodedir(s))
177 return _encodefname(encodedir(s))
178
178
179
179
180 def decodefilename(s):
180 def decodefilename(s):
181 """
181 """
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 """
184 """
185 return decodedir(_decodefname(s))
185 return decodedir(_decodefname(s))
186
186
187
187
188 def _buildlowerencodefun():
188 def _buildlowerencodefun():
189 """
189 """
190 >>> f = _buildlowerencodefun()
190 >>> f = _buildlowerencodefun()
191 >>> f(b'nothing/special.txt')
191 >>> f(b'nothing/special.txt')
192 'nothing/special.txt'
192 'nothing/special.txt'
193 >>> f(b'HELLO')
193 >>> f(b'HELLO')
194 'hello'
194 'hello'
195 >>> f(b'hello:world?')
195 >>> f(b'hello:world?')
196 'hello~3aworld~3f'
196 'hello~3aworld~3f'
197 >>> f(b'the\\x07quick\\xADshot')
197 >>> f(b'the\\x07quick\\xADshot')
198 'the~07quick~adshot'
198 'the~07quick~adshot'
199 """
199 """
200 xchr = pycompat.bytechr
200 xchr = pycompat.bytechr
201 cmap = {xchr(x): xchr(x) for x in range(127)}
201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 for x in _reserved():
202 for x in _reserved():
203 cmap[xchr(x)] = b"~%02x" % x
203 cmap[xchr(x)] = b"~%02x" % x
204 for x in range(ord(b"A"), ord(b"Z") + 1):
204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 cmap[xchr(x)] = xchr(x).lower()
205 cmap[xchr(x)] = xchr(x).lower()
206
206
207 def lowerencode(s):
207 def lowerencode(s):
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209
209
210 return lowerencode
210 return lowerencode
211
211
212
212
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214
214
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218
218
219
219
220 def _auxencode(path, dotencode):
220 def _auxencode(path, dotencode):
221 """
221 """
222 Encodes filenames containing names reserved by Windows or which end in
222 Encodes filenames containing names reserved by Windows or which end in
223 period or space. Does not touch other single reserved characters c.
223 period or space. Does not touch other single reserved characters c.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Additionally encodes space or period at the beginning, if dotencode is
225 Additionally encodes space or period at the beginning, if dotencode is
226 True. Parameter path is assumed to be all lowercase.
226 True. Parameter path is assumed to be all lowercase.
227 A segment only needs encoding if a reserved name appears as a
227 A segment only needs encoding if a reserved name appears as a
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 doesn't need encoding.
229 doesn't need encoding.
230
230
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> _auxencode(s.split(b'/'), True)
232 >>> _auxencode(s.split(b'/'), True)
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> _auxencode(s.split(b'/'), False)
235 >>> _auxencode(s.split(b'/'), False)
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 >>> _auxencode([b'foo. '], True)
237 >>> _auxencode([b'foo. '], True)
238 ['foo.~20']
238 ['foo.~20']
239 >>> _auxencode([b' .foo'], True)
239 >>> _auxencode([b' .foo'], True)
240 ['~20.foo']
240 ['~20.foo']
241 """
241 """
242 for i, n in enumerate(path):
242 for i, n in enumerate(path):
243 if not n:
243 if not n:
244 continue
244 continue
245 if dotencode and n[0] in b'. ':
245 if dotencode and n[0] in b'. ':
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 path[i] = n
247 path[i] = n
248 else:
248 else:
249 l = n.find(b'.')
249 l = n.find(b'.')
250 if l == -1:
250 if l == -1:
251 l = len(n)
251 l = len(n)
252 if (l == 3 and n[:3] in _winres3) or (
252 if (l == 3 and n[:3] in _winres3) or (
253 l == 4
253 l == 4
254 and n[3:4] <= b'9'
254 and n[3:4] <= b'9'
255 and n[3:4] >= b'1'
255 and n[3:4] >= b'1'
256 and n[:3] in _winres4
256 and n[:3] in _winres4
257 ):
257 ):
258 # encode third letter ('aux' -> 'au~78')
258 # encode third letter ('aux' -> 'au~78')
259 ec = b"~%02x" % ord(n[2:3])
259 ec = b"~%02x" % ord(n[2:3])
260 n = n[0:2] + ec + n[3:]
260 n = n[0:2] + ec + n[3:]
261 path[i] = n
261 path[i] = n
262 if n[-1] in b'. ':
262 if n[-1] in b'. ':
263 # encode last period or space ('foo...' -> 'foo..~2e')
263 # encode last period or space ('foo...' -> 'foo..~2e')
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 return path
265 return path
266
266
267
267
268 _maxstorepathlen = 120
268 _maxstorepathlen = 120
269 _dirprefixlen = 8
269 _dirprefixlen = 8
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271
271
272
272
273 def _hashencode(path, dotencode):
273 def _hashencode(path, dotencode):
274 digest = hex(hashutil.sha1(path).digest())
274 digest = hex(hashutil.sha1(path).digest())
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 parts = _auxencode(le, dotencode)
276 parts = _auxencode(le, dotencode)
277 basename = parts[-1]
277 basename = parts[-1]
278 _root, ext = os.path.splitext(basename)
278 _root, ext = os.path.splitext(basename)
279 sdirs = []
279 sdirs = []
280 sdirslen = 0
280 sdirslen = 0
281 for p in parts[:-1]:
281 for p in parts[:-1]:
282 d = p[:_dirprefixlen]
282 d = p[:_dirprefixlen]
283 if d[-1] in b'. ':
283 if d[-1] in b'. ':
284 # Windows can't access dirs ending in period or space
284 # Windows can't access dirs ending in period or space
285 d = d[:-1] + b'_'
285 d = d[:-1] + b'_'
286 if sdirslen == 0:
286 if sdirslen == 0:
287 t = len(d)
287 t = len(d)
288 else:
288 else:
289 t = sdirslen + 1 + len(d)
289 t = sdirslen + 1 + len(d)
290 if t > _maxshortdirslen:
290 if t > _maxshortdirslen:
291 break
291 break
292 sdirs.append(d)
292 sdirs.append(d)
293 sdirslen = t
293 sdirslen = t
294 dirs = b'/'.join(sdirs)
294 dirs = b'/'.join(sdirs)
295 if len(dirs) > 0:
295 if len(dirs) > 0:
296 dirs += b'/'
296 dirs += b'/'
297 res = b'dh/' + dirs + digest + ext
297 res = b'dh/' + dirs + digest + ext
298 spaceleft = _maxstorepathlen - len(res)
298 spaceleft = _maxstorepathlen - len(res)
299 if spaceleft > 0:
299 if spaceleft > 0:
300 filler = basename[:spaceleft]
300 filler = basename[:spaceleft]
301 res = b'dh/' + dirs + filler + digest + ext
301 res = b'dh/' + dirs + filler + digest + ext
302 return res
302 return res
303
303
304
304
305 def _hybridencode(path, dotencode):
305 def _hybridencode(path, dotencode):
306 """encodes path with a length limit
306 """encodes path with a length limit
307
307
308 Encodes all paths that begin with 'data/', according to the following.
308 Encodes all paths that begin with 'data/', according to the following.
309
309
310 Default encoding (reversible):
310 Default encoding (reversible):
311
311
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 characters are encoded as '~xx', where xx is the two digit hex code
313 characters are encoded as '~xx', where xx is the two digit hex code
314 of the character (see encodefilename).
314 of the character (see encodefilename).
315 Relevant path components consisting of Windows reserved filenames are
315 Relevant path components consisting of Windows reserved filenames are
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317
317
318 Hashed encoding (not reversible):
318 Hashed encoding (not reversible):
319
319
320 If the default-encoded path is longer than _maxstorepathlen, a
320 If the default-encoded path is longer than _maxstorepathlen, a
321 non-reversible hybrid hashing of the path is done instead.
321 non-reversible hybrid hashing of the path is done instead.
322 This encoding uses up to _dirprefixlen characters of all directory
322 This encoding uses up to _dirprefixlen characters of all directory
323 levels of the lowerencoded path, but not more levels than can fit into
323 levels of the lowerencoded path, but not more levels than can fit into
324 _maxshortdirslen.
324 _maxshortdirslen.
325 Then follows the filler followed by the sha digest of the full path.
325 Then follows the filler followed by the sha digest of the full path.
326 The filler is the beginning of the basename of the lowerencoded path
326 The filler is the beginning of the basename of the lowerencoded path
327 (the basename is everything after the last path separator). The filler
327 (the basename is everything after the last path separator). The filler
328 is as long as possible, filling in characters from the basename until
328 is as long as possible, filling in characters from the basename until
329 the encoded path has _maxstorepathlen characters (or all chars of the
329 the encoded path has _maxstorepathlen characters (or all chars of the
330 basename have been taken).
330 basename have been taken).
331 The extension (e.g. '.i' or '.d') is preserved.
331 The extension (e.g. '.i' or '.d') is preserved.
332
332
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 encoding was used.
334 encoding was used.
335 """
335 """
336 path = encodedir(path)
336 path = encodedir(path)
337 ef = _encodefname(path).split(b'/')
337 ef = _encodefname(path).split(b'/')
338 res = b'/'.join(_auxencode(ef, dotencode))
338 res = b'/'.join(_auxencode(ef, dotencode))
339 if len(res) > _maxstorepathlen:
339 if len(res) > _maxstorepathlen:
340 res = _hashencode(path, dotencode)
340 res = _hashencode(path, dotencode)
341 return res
341 return res
342
342
343
343
344 def _pathencode(path):
344 def _pathencode(path):
345 de = encodedir(path)
345 de = encodedir(path)
346 if len(path) > _maxstorepathlen:
346 if len(path) > _maxstorepathlen:
347 return _hashencode(de, True)
347 return _hashencode(de, True)
348 ef = _encodefname(de).split(b'/')
348 ef = _encodefname(de).split(b'/')
349 res = b'/'.join(_auxencode(ef, True))
349 res = b'/'.join(_auxencode(ef, True))
350 if len(res) > _maxstorepathlen:
350 if len(res) > _maxstorepathlen:
351 return _hashencode(de, True)
351 return _hashencode(de, True)
352 return res
352 return res
353
353
354
354
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356
356
357
357
358 def _plainhybridencode(f):
358 def _plainhybridencode(f):
359 return _hybridencode(f, False)
359 return _hybridencode(f, False)
360
360
361
361
362 def _calcmode(vfs):
362 def _calcmode(vfs):
363 try:
363 try:
364 # files in .hg/ will be created using this mode
364 # files in .hg/ will be created using this mode
365 mode = vfs.stat().st_mode
365 mode = vfs.stat().st_mode
366 # avoid some useless chmods
366 # avoid some useless chmods
367 if (0o777 & ~util.umask) == (0o777 & mode):
367 if (0o777 & ~util.umask) == (0o777 & mode):
368 mode = None
368 mode = None
369 except OSError:
369 except OSError:
370 mode = None
370 mode = None
371 return mode
371 return mode
372
372
373
373
374 _data = [
374 _data = [
375 b'bookmarks',
375 b'bookmarks',
376 b'narrowspec',
376 b'narrowspec',
377 b'data',
377 b'data',
378 b'meta',
378 b'meta',
379 b'00manifest.d',
379 b'00manifest.d',
380 b'00manifest.i',
380 b'00manifest.i',
381 b'00changelog.d',
381 b'00changelog.d',
382 b'00changelog.i',
382 b'00changelog.i',
383 b'phaseroots',
383 b'phaseroots',
384 b'obsstore',
384 b'obsstore',
385 b'requires',
385 b'requires',
386 ]
386 ]
387
387
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_OTHER_EXT = (
389 REVLOG_FILES_OTHER_EXT = (
390 b'.idx',
390 b'.idx',
391 b'.d',
391 b'.d',
392 b'.dat',
392 b'.dat',
393 b'.n',
393 b'.n',
394 b'.nd',
394 b'.nd',
395 b'.sda',
395 b'.sda',
396 )
396 )
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 REVLOG_FILES_LONG_EXT = (
398 REVLOG_FILES_LONG_EXT = (
399 b'.nd',
399 b'.nd',
400 b'.idx',
400 b'.idx',
401 b'.dat',
401 b'.dat',
402 b'.sda',
402 b'.sda',
403 )
403 )
404 # files that are "volatile" and might change between listing and streaming
404 # files that are "volatile" and might change between listing and streaming
405 #
405 #
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # deleted.
407 # deleted.
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409
409
410 # some exception to the above matching
410 # some exception to the above matching
411 #
411 #
412 # XXX This is currently not in use because of issue6542
412 # XXX This is currently not in use because of issue6542
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414
414
415
415
416 def is_revlog(f, kind, st):
416 def is_revlog(f, kind, st):
417 if kind != stat.S_IFREG:
417 if kind != stat.S_IFREG:
418 return None
418 return None
419 return revlog_type(f)
419 return revlog_type(f)
420
420
421
421
422 def revlog_type(f):
422 def revlog_type(f):
423 # XXX we need to filter `undo.` created by the transaction here, however
423 # XXX we need to filter `undo.` created by the transaction here, however
424 # being naive about it also filter revlog for `undo.*` files, leading to
424 # being naive about it also filter revlog for `undo.*` files, leading to
425 # issue6542. So we no longer use EXCLUDED.
425 # issue6542. So we no longer use EXCLUDED.
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 return FILEFLAGS_REVLOG_MAIN
427 return FILEFLAGS_REVLOG_MAIN
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 t = FILETYPE_FILELOG_OTHER
429 t = FILETYPE_FILELOG_OTHER
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 t |= FILEFLAGS_VOLATILE
431 t |= FILEFLAGS_VOLATILE
432 return t
432 return t
433 return None
433 return None
434
434
435
435
436 # the file is part of changelog data
436 # the file is part of changelog data
437 FILEFLAGS_CHANGELOG = 1 << 13
437 FILEFLAGS_CHANGELOG = 1 << 13
438 # the file is part of manifest data
438 # the file is part of manifest data
439 FILEFLAGS_MANIFESTLOG = 1 << 12
439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 # the file is part of filelog data
440 # the file is part of filelog data
441 FILEFLAGS_FILELOG = 1 << 11
441 FILEFLAGS_FILELOG = 1 << 11
442 # file that are not directly part of a revlog
442 # file that are not directly part of a revlog
443 FILEFLAGS_OTHER = 1 << 10
443 FILEFLAGS_OTHER = 1 << 10
444
444
445 # the main entry point for a revlog
445 # the main entry point for a revlog
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 # a secondary file for a revlog
447 # a secondary file for a revlog
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449
449
450 # files that are "volatile" and might change between listing and streaming
450 # files that are "volatile" and might change between listing and streaming
451 FILEFLAGS_VOLATILE = 1 << 20
451 FILEFLAGS_VOLATILE = 1 << 20
452
452
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
460
460
461
461
462 @attr.s(slots=True, init=False)
462 @attr.s(slots=True, init=False)
463 class BaseStoreEntry:
463 class BaseStoreEntry:
464 """An entry in the store
464 """An entry in the store
465
465
466 This is returned by `store.walk` and represent some data in the store."""
466 This is returned by `store.walk` and represent some data in the store."""
467
467
468
468
469 @attr.s(slots=True, init=False)
469 @attr.s(slots=True, init=False)
470 class SimpleStoreEntry(BaseStoreEntry):
470 class SimpleStoreEntry(BaseStoreEntry):
471 """A generic entry in the store"""
471 """A generic entry in the store"""
472
472
473 is_revlog = False
473 is_revlog = False
474
474
475 _entry_path = attr.ib()
475 _entry_path = attr.ib()
476 _is_volatile = attr.ib(default=False)
476 _is_volatile = attr.ib(default=False)
477 _file_size = attr.ib(default=None)
477 _file_size = attr.ib(default=None)
478
478
479 def __init__(
479 def __init__(
480 self,
480 self,
481 entry_path,
481 entry_path,
482 is_volatile=False,
482 is_volatile=False,
483 file_size=None,
483 file_size=None,
484 ):
484 ):
485 super().__init__()
485 super().__init__()
486 self._entry_path = entry_path
486 self._entry_path = entry_path
487 self._is_volatile = is_volatile
487 self._is_volatile = is_volatile
488 self._file_size = file_size
488 self._file_size = file_size
489
489
490 def files(self):
490 def files(self):
491 return [
491 return [
492 StoreFile(
492 StoreFile(
493 unencoded_path=self._entry_path,
493 unencoded_path=self._entry_path,
494 file_size=self._file_size,
494 file_size=self._file_size,
495 is_volatile=self._is_volatile,
495 is_volatile=self._is_volatile,
496 )
496 )
497 ]
497 ]
498
498
499
499
500 @attr.s(slots=True, init=False)
500 @attr.s(slots=True, init=False)
501 class RevlogStoreEntry(BaseStoreEntry):
501 class RevlogStoreEntry(BaseStoreEntry):
502 """A revlog entry in the store"""
502 """A revlog entry in the store"""
503
503
504 is_revlog = True
504 is_revlog = True
505
505
506 revlog_type = attr.ib(default=None)
506 revlog_type = attr.ib(default=None)
507 target_id = attr.ib(default=None)
507 target_id = attr.ib(default=None)
508 _path_prefix = attr.ib(default=None)
508 _path_prefix = attr.ib(default=None)
509 _details = attr.ib(default=None)
509 _details = attr.ib(default=None)
510
510
511 def __init__(
511 def __init__(
512 self,
512 self,
513 revlog_type,
513 revlog_type,
514 path_prefix,
514 path_prefix,
515 target_id,
515 target_id,
516 details,
516 details,
517 ):
517 ):
518 super().__init__()
518 super().__init__()
519 self.revlog_type = revlog_type
519 self.revlog_type = revlog_type
520 self.target_id = target_id
520 self.target_id = target_id
521 self._path_prefix = path_prefix
521 self._path_prefix = path_prefix
522 assert b'.i' in details, (path_prefix, details)
522 assert b'.i' in details, (path_prefix, details)
523 self._details = details
523 self._details = details
524
524
525 @property
525 @property
526 def is_changelog(self):
526 def is_changelog(self):
527 return self.revlog_type & FILEFLAGS_CHANGELOG
527 return self.revlog_type & FILEFLAGS_CHANGELOG
528
528
529 @property
529 @property
530 def is_manifestlog(self):
530 def is_manifestlog(self):
531 return self.revlog_type & FILEFLAGS_MANIFESTLOG
531 return self.revlog_type & FILEFLAGS_MANIFESTLOG
532
532
533 @property
533 @property
534 def is_filelog(self):
534 def is_filelog(self):
535 return self.revlog_type & FILEFLAGS_FILELOG
535 return self.revlog_type & FILEFLAGS_FILELOG
536
536
537 def main_file_path(self):
537 def main_file_path(self):
538 """unencoded path of the main revlog file"""
538 """unencoded path of the main revlog file"""
539 return self._path_prefix + b'.i'
539 return self._path_prefix + b'.i'
540
540
541 def files(self):
541 def files(self):
542 files = []
542 files = []
543 for ext in sorted(self._details, key=_ext_key):
543 for ext in sorted(self._details, key=_ext_key):
544 path = self._path_prefix + ext
544 path = self._path_prefix + ext
545 data = self._details[ext]
545 data = self._details[ext]
546 files.append(StoreFile(unencoded_path=path, **data))
546 files.append(StoreFile(unencoded_path=path, **data))
547 return files
547 return files
548
548
549 def get_revlog_instance(self, repo):
549 def get_revlog_instance(self, repo):
550 """Obtain a revlog instance from this store entry
550 """Obtain a revlog instance from this store entry
551
551
552 An instance of the appropriate class is returned.
552 An instance of the appropriate class is returned.
553 """
553 """
554 if self.is_changelog:
554 if self.is_changelog:
555 return changelog.changelog(repo.svfs)
555 return changelog.changelog(repo.svfs)
556 elif self.is_manifestlog:
556 elif self.is_manifestlog:
557 mandir = self.target_id
557 mandir = self.target_id
558 return manifest.manifestrevlog(
558 return manifest.manifestrevlog(
559 repo.nodeconstants, repo.svfs, tree=mandir
559 repo.nodeconstants, repo.svfs, tree=mandir
560 )
560 )
561 else:
561 else:
562 return filelog.filelog(repo.svfs, self.target_id)
562 return filelog.filelog(repo.svfs, self.target_id)
563
563
564
564
565 @attr.s(slots=True)
565 @attr.s(slots=True)
566 class StoreFile:
566 class StoreFile:
567 """a file matching an entry"""
567 """a file matching an entry"""
568
568
569 unencoded_path = attr.ib()
569 unencoded_path = attr.ib()
570 _file_size = attr.ib(default=None)
570 _file_size = attr.ib(default=None)
571 is_volatile = attr.ib(default=False)
571 is_volatile = attr.ib(default=False)
572
572
573 def file_size(self, vfs):
573 def file_size(self, vfs):
574 if self._file_size is not None:
574 if self._file_size is not None:
575 return self._file_size
575 return self._file_size
576 try:
576 try:
577 return vfs.stat(self.unencoded_path).st_size
577 return vfs.stat(self.unencoded_path).st_size
578 except FileNotFoundError:
578 except FileNotFoundError:
579 return 0
579 return 0
580
580
581
581
582 def _gather_revlog(files_data):
582 def _gather_revlog(files_data):
583 """group files per revlog prefix
583 """group files per revlog prefix
584
584
585 The returns a two level nested dict. The top level key is the revlog prefix
585 The returns a two level nested dict. The top level key is the revlog prefix
586 without extension, the second level is all the file "suffix" that were
586 without extension, the second level is all the file "suffix" that were
587 seen for this revlog and arbitrary file data as value.
587 seen for this revlog and arbitrary file data as value.
588 """
588 """
589 revlogs = collections.defaultdict(dict)
589 revlogs = collections.defaultdict(dict)
590 for u, value in files_data:
590 for u, value in files_data:
591 name, ext = _split_revlog_ext(u)
591 name, ext = _split_revlog_ext(u)
592 revlogs[name][ext] = value
592 revlogs[name][ext] = value
593 return sorted(revlogs.items())
593 return sorted(revlogs.items())
594
594
595
595
596 def _split_revlog_ext(filename):
596 def _split_revlog_ext(filename):
597 """split the revlog file prefix from the variable extension"""
597 """split the revlog file prefix from the variable extension"""
598 if filename.endswith(REVLOG_FILES_LONG_EXT):
598 if filename.endswith(REVLOG_FILES_LONG_EXT):
599 char = b'-'
599 char = b'-'
600 else:
600 else:
601 char = b'.'
601 char = b'.'
602 idx = filename.rfind(char)
602 idx = filename.rfind(char)
603 return filename[:idx], filename[idx:]
603 return filename[:idx], filename[idx:]
604
604
605
605
606 def _ext_key(ext):
606 def _ext_key(ext):
607 """a key to order revlog suffix
607 """a key to order revlog suffix
608
608
609 important to issue .i after other entry."""
609 important to issue .i after other entry."""
610 # the only important part of this order is to keep the `.i` last.
610 # the only important part of this order is to keep the `.i` last.
611 if ext.endswith(b'.n'):
611 if ext.endswith(b'.n'):
612 return (0, ext)
612 return (0, ext)
613 elif ext.endswith(b'.nd'):
613 elif ext.endswith(b'.nd'):
614 return (10, ext)
614 return (10, ext)
615 elif ext.endswith(b'.d'):
615 elif ext.endswith(b'.d'):
616 return (20, ext)
616 return (20, ext)
617 elif ext.endswith(b'.i'):
617 elif ext.endswith(b'.i'):
618 return (50, ext)
618 return (50, ext)
619 else:
619 else:
620 return (40, ext)
620 return (40, ext)
621
621
622
622
623 class basicstore:
623 class basicstore:
624 '''base class for local repository stores'''
624 '''base class for local repository stores'''
625
625
626 def __init__(self, path, vfstype):
626 def __init__(self, path, vfstype):
627 vfs = vfstype(path)
627 vfs = vfstype(path)
628 self.path = vfs.base
628 self.path = vfs.base
629 self.createmode = _calcmode(vfs)
629 self.createmode = _calcmode(vfs)
630 vfs.createmode = self.createmode
630 vfs.createmode = self.createmode
631 self.rawvfs = vfs
631 self.rawvfs = vfs
632 self.vfs = vfsmod.filtervfs(vfs, encodedir)
632 self.vfs = vfsmod.filtervfs(vfs, encodedir)
633 self.opener = self.vfs
633 self.opener = self.vfs
634
634
635 def join(self, f):
635 def join(self, f):
636 return self.path + b'/' + encodedir(f)
636 return self.path + b'/' + encodedir(f)
637
637
638 def _walk(self, relpath, recurse, undecodable=None):
638 def _walk(self, relpath, recurse, undecodable=None):
639 '''yields (revlog_type, unencoded, size)'''
639 '''yields (revlog_type, unencoded, size)'''
640 path = self.path
640 path = self.path
641 if relpath:
641 if relpath:
642 path += b'/' + relpath
642 path += b'/' + relpath
643 striplen = len(self.path) + 1
643 striplen = len(self.path) + 1
644 l = []
644 l = []
645 if self.rawvfs.isdir(path):
645 if self.rawvfs.isdir(path):
646 visit = [path]
646 visit = [path]
647 readdir = self.rawvfs.readdir
647 readdir = self.rawvfs.readdir
648 while visit:
648 while visit:
649 p = visit.pop()
649 p = visit.pop()
650 for f, kind, st in readdir(p, stat=True):
650 for f, kind, st in readdir(p, stat=True):
651 fp = p + b'/' + f
651 fp = p + b'/' + f
652 rl_type = is_revlog(f, kind, st)
652 rl_type = is_revlog(f, kind, st)
653 if rl_type is not None:
653 if rl_type is not None:
654 n = util.pconvert(fp[striplen:])
654 n = util.pconvert(fp[striplen:])
655 l.append((decodedir(n), (rl_type, st.st_size)))
655 l.append((decodedir(n), (rl_type, st.st_size)))
656 elif kind == stat.S_IFDIR and recurse:
656 elif kind == stat.S_IFDIR and recurse:
657 visit.append(fp)
657 visit.append(fp)
658
658
659 l.sort()
659 l.sort()
660 return l
660 return l
661
661
662 def changelog(self, trypending, concurrencychecker=None):
662 def changelog(self, trypending, concurrencychecker=None):
663 return changelog.changelog(
663 return changelog.changelog(
664 self.vfs,
664 self.vfs,
665 trypending=trypending,
665 trypending=trypending,
666 concurrencychecker=concurrencychecker,
666 concurrencychecker=concurrencychecker,
667 )
667 )
668
668
669 def manifestlog(self, repo, storenarrowmatch):
669 def manifestlog(self, repo, storenarrowmatch):
670 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
670 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
671 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
671 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
672
672
673 def data_entries(
673 def data_entries(
674 self, matcher=None, undecodable=None
674 self, matcher=None, undecodable=None
675 ) -> Generator[BaseStoreEntry, None, None]:
675 ) -> Generator[BaseStoreEntry, None, None]:
676 """Like walk, but excluding the changelog and root manifest.
676 """Like walk, but excluding the changelog and root manifest.
677
677
678 When [undecodable] is None, revlogs names that can't be
678 When [undecodable] is None, revlogs names that can't be
679 decoded cause an exception. When it is provided, it should
679 decoded cause an exception. When it is provided, it should
680 be a list and the filenames that can't be decoded are added
680 be a list and the filenames that can't be decoded are added
681 to it instead. This is very rarely needed."""
681 to it instead. This is very rarely needed."""
682 dirs = [
682 dirs = [
683 (b'data', FILEFLAGS_FILELOG),
683 (b'data', FILEFLAGS_FILELOG, False),
684 (b'meta', FILEFLAGS_MANIFESTLOG),
684 (b'meta', FILEFLAGS_MANIFESTLOG, True),
685 ]
685 ]
686 for base_dir, rl_type in dirs:
686 for base_dir, rl_type, strip_filename in dirs:
687 files = self._walk(base_dir, True, undecodable=undecodable)
687 files = self._walk(base_dir, True, undecodable=undecodable)
688 files = (f for f in files if f[1][0] is not None)
688 files = (f for f in files if f[1][0] is not None)
689 for revlog, details in _gather_revlog(files):
689 for revlog, details in _gather_revlog(files):
690 file_details = {}
690 file_details = {}
691 revlog_target_id = revlog.split(b'/', 1)[1]
691 revlog_target_id = revlog.split(b'/', 1)[1]
692 if strip_filename and b'/' in revlog:
693 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
694 revlog_target_id += b'/'
692 for ext, (t, s) in sorted(details.items()):
695 for ext, (t, s) in sorted(details.items()):
693 file_details[ext] = {
696 file_details[ext] = {
694 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
697 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
695 'file_size': s,
698 'file_size': s,
696 }
699 }
697 yield RevlogStoreEntry(
700 yield RevlogStoreEntry(
698 path_prefix=revlog,
701 path_prefix=revlog,
699 revlog_type=rl_type,
702 revlog_type=rl_type,
700 target_id=revlog_target_id,
703 target_id=revlog_target_id,
701 details=file_details,
704 details=file_details,
702 )
705 )
703
706
704 def top_entries(
707 def top_entries(
705 self, phase=False, obsolescence=False
708 self, phase=False, obsolescence=False
706 ) -> Generator[BaseStoreEntry, None, None]:
709 ) -> Generator[BaseStoreEntry, None, None]:
707 if phase and self.vfs.exists(b'phaseroots'):
710 if phase and self.vfs.exists(b'phaseroots'):
708 yield SimpleStoreEntry(
711 yield SimpleStoreEntry(
709 entry_path=b'phaseroots',
712 entry_path=b'phaseroots',
710 is_volatile=True,
713 is_volatile=True,
711 )
714 )
712
715
713 if obsolescence and self.vfs.exists(b'obsstore'):
716 if obsolescence and self.vfs.exists(b'obsstore'):
714 # XXX if we had the file size it could be non-volatile
717 # XXX if we had the file size it could be non-volatile
715 yield SimpleStoreEntry(
718 yield SimpleStoreEntry(
716 entry_path=b'obsstore',
719 entry_path=b'obsstore',
717 is_volatile=True,
720 is_volatile=True,
718 )
721 )
719
722
720 files = reversed(self._walk(b'', False))
723 files = reversed(self._walk(b'', False))
721
724
722 changelogs = collections.defaultdict(dict)
725 changelogs = collections.defaultdict(dict)
723 manifestlogs = collections.defaultdict(dict)
726 manifestlogs = collections.defaultdict(dict)
724
727
725 for u, (t, s) in files:
728 for u, (t, s) in files:
726 if u.startswith(b'00changelog'):
729 if u.startswith(b'00changelog'):
727 name, ext = _split_revlog_ext(u)
730 name, ext = _split_revlog_ext(u)
728 changelogs[name][ext] = (t, s)
731 changelogs[name][ext] = (t, s)
729 elif u.startswith(b'00manifest'):
732 elif u.startswith(b'00manifest'):
730 name, ext = _split_revlog_ext(u)
733 name, ext = _split_revlog_ext(u)
731 manifestlogs[name][ext] = (t, s)
734 manifestlogs[name][ext] = (t, s)
732 else:
735 else:
733 yield SimpleStoreEntry(
736 yield SimpleStoreEntry(
734 entry_path=u,
737 entry_path=u,
735 is_volatile=bool(t & FILEFLAGS_VOLATILE),
738 is_volatile=bool(t & FILEFLAGS_VOLATILE),
736 file_size=s,
739 file_size=s,
737 )
740 )
738 # yield manifest before changelog
741 # yield manifest before changelog
739 top_rl = [
742 top_rl = [
740 (manifestlogs, FILEFLAGS_MANIFESTLOG),
743 (manifestlogs, FILEFLAGS_MANIFESTLOG),
741 (changelogs, FILEFLAGS_CHANGELOG),
744 (changelogs, FILEFLAGS_CHANGELOG),
742 ]
745 ]
743 assert len(manifestlogs) <= 1
746 assert len(manifestlogs) <= 1
744 assert len(changelogs) <= 1
747 assert len(changelogs) <= 1
745 for data, revlog_type in top_rl:
748 for data, revlog_type in top_rl:
746 for revlog, details in sorted(data.items()):
749 for revlog, details in sorted(data.items()):
747 file_details = {}
750 file_details = {}
748 for ext, (t, s) in details.items():
751 for ext, (t, s) in details.items():
749 file_details[ext] = {
752 file_details[ext] = {
750 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
753 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
751 'file_size': s,
754 'file_size': s,
752 }
755 }
753 yield RevlogStoreEntry(
756 yield RevlogStoreEntry(
754 path_prefix=revlog,
757 path_prefix=revlog,
755 revlog_type=revlog_type,
758 revlog_type=revlog_type,
756 target_id=b'',
759 target_id=b'',
757 details=file_details,
760 details=file_details,
758 )
761 )
759
762
760 def walk(
763 def walk(
761 self, matcher=None, phase=False, obsolescence=False
764 self, matcher=None, phase=False, obsolescence=False
762 ) -> Generator[BaseStoreEntry, None, None]:
765 ) -> Generator[BaseStoreEntry, None, None]:
763 """return files related to data storage (ie: revlogs)
766 """return files related to data storage (ie: revlogs)
764
767
765 yields instance from BaseStoreEntry subclasses
768 yields instance from BaseStoreEntry subclasses
766
769
767 if a matcher is passed, storage files of only those tracked paths
770 if a matcher is passed, storage files of only those tracked paths
768 are passed with matches the matcher
771 are passed with matches the matcher
769 """
772 """
770 # yield data files first
773 # yield data files first
771 for x in self.data_entries(matcher):
774 for x in self.data_entries(matcher):
772 yield x
775 yield x
773 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
776 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
774 yield x
777 yield x
775
778
776 def copylist(self):
779 def copylist(self):
777 return _data
780 return _data
778
781
779 def write(self, tr):
782 def write(self, tr):
780 pass
783 pass
781
784
782 def invalidatecaches(self):
785 def invalidatecaches(self):
783 pass
786 pass
784
787
785 def markremoved(self, fn):
788 def markremoved(self, fn):
786 pass
789 pass
787
790
788 def __contains__(self, path):
791 def __contains__(self, path):
789 '''Checks if the store contains path'''
792 '''Checks if the store contains path'''
790 path = b"/".join((b"data", path))
793 path = b"/".join((b"data", path))
791 # file?
794 # file?
792 if self.vfs.exists(path + b".i"):
795 if self.vfs.exists(path + b".i"):
793 return True
796 return True
794 # dir?
797 # dir?
795 if not path.endswith(b"/"):
798 if not path.endswith(b"/"):
796 path = path + b"/"
799 path = path + b"/"
797 return self.vfs.exists(path)
800 return self.vfs.exists(path)
798
801
799
802
800 class encodedstore(basicstore):
803 class encodedstore(basicstore):
801 def __init__(self, path, vfstype):
804 def __init__(self, path, vfstype):
802 vfs = vfstype(path + b'/store')
805 vfs = vfstype(path + b'/store')
803 self.path = vfs.base
806 self.path = vfs.base
804 self.createmode = _calcmode(vfs)
807 self.createmode = _calcmode(vfs)
805 vfs.createmode = self.createmode
808 vfs.createmode = self.createmode
806 self.rawvfs = vfs
809 self.rawvfs = vfs
807 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
810 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
808 self.opener = self.vfs
811 self.opener = self.vfs
809
812
810 def _walk(self, relpath, recurse, undecodable=None):
813 def _walk(self, relpath, recurse, undecodable=None):
811 old = super()._walk(relpath, recurse)
814 old = super()._walk(relpath, recurse)
812 new = []
815 new = []
813 for f1, value in old:
816 for f1, value in old:
814 try:
817 try:
815 f2 = decodefilename(f1)
818 f2 = decodefilename(f1)
816 except KeyError:
819 except KeyError:
817 if undecodable is None:
820 if undecodable is None:
818 msg = _(b'undecodable revlog name %s') % f1
821 msg = _(b'undecodable revlog name %s') % f1
819 raise error.StorageError(msg)
822 raise error.StorageError(msg)
820 else:
823 else:
821 undecodable.append(f1)
824 undecodable.append(f1)
822 continue
825 continue
823 new.append((f2, value))
826 new.append((f2, value))
824 return new
827 return new
825
828
826 def data_entries(
829 def data_entries(
827 self, matcher=None, undecodable=None
830 self, matcher=None, undecodable=None
828 ) -> Generator[BaseStoreEntry, None, None]:
831 ) -> Generator[BaseStoreEntry, None, None]:
829 entries = super(encodedstore, self).data_entries(
832 entries = super(encodedstore, self).data_entries(
830 undecodable=undecodable
833 undecodable=undecodable
831 )
834 )
832 for entry in entries:
835 for entry in entries:
833 if _match_tracked_entry(entry, matcher):
836 if _match_tracked_entry(entry, matcher):
834 yield entry
837 yield entry
835
838
836 def join(self, f):
839 def join(self, f):
837 return self.path + b'/' + encodefilename(f)
840 return self.path + b'/' + encodefilename(f)
838
841
839 def copylist(self):
842 def copylist(self):
840 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
843 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
841
844
842
845
843 class fncache:
846 class fncache:
844 # the filename used to be partially encoded
847 # the filename used to be partially encoded
845 # hence the encodedir/decodedir dance
848 # hence the encodedir/decodedir dance
846 def __init__(self, vfs):
849 def __init__(self, vfs):
847 self.vfs = vfs
850 self.vfs = vfs
848 self._ignores = set()
851 self._ignores = set()
849 self.entries = None
852 self.entries = None
850 self._dirty = False
853 self._dirty = False
851 # set of new additions to fncache
854 # set of new additions to fncache
852 self.addls = set()
855 self.addls = set()
853
856
854 def ensureloaded(self, warn=None):
857 def ensureloaded(self, warn=None):
855 """read the fncache file if not already read.
858 """read the fncache file if not already read.
856
859
857 If the file on disk is corrupted, raise. If warn is provided,
860 If the file on disk is corrupted, raise. If warn is provided,
858 warn and keep going instead."""
861 warn and keep going instead."""
859 if self.entries is None:
862 if self.entries is None:
860 self._load(warn)
863 self._load(warn)
861
864
862 def _load(self, warn=None):
865 def _load(self, warn=None):
863 '''fill the entries from the fncache file'''
866 '''fill the entries from the fncache file'''
864 self._dirty = False
867 self._dirty = False
865 try:
868 try:
866 fp = self.vfs(b'fncache', mode=b'rb')
869 fp = self.vfs(b'fncache', mode=b'rb')
867 except IOError:
870 except IOError:
868 # skip nonexistent file
871 # skip nonexistent file
869 self.entries = set()
872 self.entries = set()
870 return
873 return
871
874
872 self.entries = set()
875 self.entries = set()
873 chunk = b''
876 chunk = b''
874 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
877 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
875 chunk += c
878 chunk += c
876 try:
879 try:
877 p = chunk.rindex(b'\n')
880 p = chunk.rindex(b'\n')
878 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
881 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
879 chunk = chunk[p + 1 :]
882 chunk = chunk[p + 1 :]
880 except ValueError:
883 except ValueError:
881 # substring '\n' not found, maybe the entry is bigger than the
884 # substring '\n' not found, maybe the entry is bigger than the
882 # chunksize, so let's keep iterating
885 # chunksize, so let's keep iterating
883 pass
886 pass
884
887
885 if chunk:
888 if chunk:
886 msg = _(b"fncache does not ends with a newline")
889 msg = _(b"fncache does not ends with a newline")
887 if warn:
890 if warn:
888 warn(msg + b'\n')
891 warn(msg + b'\n')
889 else:
892 else:
890 raise error.Abort(
893 raise error.Abort(
891 msg,
894 msg,
892 hint=_(
895 hint=_(
893 b"use 'hg debugrebuildfncache' to "
896 b"use 'hg debugrebuildfncache' to "
894 b"rebuild the fncache"
897 b"rebuild the fncache"
895 ),
898 ),
896 )
899 )
897 self._checkentries(fp, warn)
900 self._checkentries(fp, warn)
898 fp.close()
901 fp.close()
899
902
900 def _checkentries(self, fp, warn):
903 def _checkentries(self, fp, warn):
901 """make sure there is no empty string in entries"""
904 """make sure there is no empty string in entries"""
902 if b'' in self.entries:
905 if b'' in self.entries:
903 fp.seek(0)
906 fp.seek(0)
904 for n, line in enumerate(fp):
907 for n, line in enumerate(fp):
905 if not line.rstrip(b'\n'):
908 if not line.rstrip(b'\n'):
906 t = _(b'invalid entry in fncache, line %d') % (n + 1)
909 t = _(b'invalid entry in fncache, line %d') % (n + 1)
907 if warn:
910 if warn:
908 warn(t + b'\n')
911 warn(t + b'\n')
909 else:
912 else:
910 raise error.Abort(t)
913 raise error.Abort(t)
911
914
912 def write(self, tr):
915 def write(self, tr):
913 if self._dirty:
916 if self._dirty:
914 assert self.entries is not None
917 assert self.entries is not None
915 self.entries = self.entries | self.addls
918 self.entries = self.entries | self.addls
916 self.addls = set()
919 self.addls = set()
917 tr.addbackup(b'fncache')
920 tr.addbackup(b'fncache')
918 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
921 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
919 if self.entries:
922 if self.entries:
920 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
923 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
921 fp.close()
924 fp.close()
922 self._dirty = False
925 self._dirty = False
923 if self.addls:
926 if self.addls:
924 # if we have just new entries, let's append them to the fncache
927 # if we have just new entries, let's append them to the fncache
925 tr.addbackup(b'fncache')
928 tr.addbackup(b'fncache')
926 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
929 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
927 if self.addls:
930 if self.addls:
928 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
931 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
929 fp.close()
932 fp.close()
930 self.entries = None
933 self.entries = None
931 self.addls = set()
934 self.addls = set()
932
935
933 def addignore(self, fn):
936 def addignore(self, fn):
934 self._ignores.add(fn)
937 self._ignores.add(fn)
935
938
936 def add(self, fn):
939 def add(self, fn):
937 if fn in self._ignores:
940 if fn in self._ignores:
938 return
941 return
939 if self.entries is None:
942 if self.entries is None:
940 self._load()
943 self._load()
941 if fn not in self.entries:
944 if fn not in self.entries:
942 self.addls.add(fn)
945 self.addls.add(fn)
943
946
944 def remove(self, fn):
947 def remove(self, fn):
945 if self.entries is None:
948 if self.entries is None:
946 self._load()
949 self._load()
947 if fn in self.addls:
950 if fn in self.addls:
948 self.addls.remove(fn)
951 self.addls.remove(fn)
949 return
952 return
950 try:
953 try:
951 self.entries.remove(fn)
954 self.entries.remove(fn)
952 self._dirty = True
955 self._dirty = True
953 except KeyError:
956 except KeyError:
954 pass
957 pass
955
958
956 def __contains__(self, fn):
959 def __contains__(self, fn):
957 if fn in self.addls:
960 if fn in self.addls:
958 return True
961 return True
959 if self.entries is None:
962 if self.entries is None:
960 self._load()
963 self._load()
961 return fn in self.entries
964 return fn in self.entries
962
965
963 def __iter__(self):
966 def __iter__(self):
964 if self.entries is None:
967 if self.entries is None:
965 self._load()
968 self._load()
966 return iter(self.entries | self.addls)
969 return iter(self.entries | self.addls)
967
970
968
971
969 class _fncachevfs(vfsmod.proxyvfs):
972 class _fncachevfs(vfsmod.proxyvfs):
970 def __init__(self, vfs, fnc, encode):
973 def __init__(self, vfs, fnc, encode):
971 vfsmod.proxyvfs.__init__(self, vfs)
974 vfsmod.proxyvfs.__init__(self, vfs)
972 self.fncache = fnc
975 self.fncache = fnc
973 self.encode = encode
976 self.encode = encode
974
977
975 def __call__(self, path, mode=b'r', *args, **kw):
978 def __call__(self, path, mode=b'r', *args, **kw):
976 encoded = self.encode(path)
979 encoded = self.encode(path)
977 if (
980 if (
978 mode not in (b'r', b'rb')
981 mode not in (b'r', b'rb')
979 and (path.startswith(b'data/') or path.startswith(b'meta/'))
982 and (path.startswith(b'data/') or path.startswith(b'meta/'))
980 and revlog_type(path) is not None
983 and revlog_type(path) is not None
981 ):
984 ):
982 # do not trigger a fncache load when adding a file that already is
985 # do not trigger a fncache load when adding a file that already is
983 # known to exist.
986 # known to exist.
984 notload = self.fncache.entries is None and self.vfs.exists(encoded)
987 notload = self.fncache.entries is None and self.vfs.exists(encoded)
985 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
988 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
986 # when appending to an existing file, if the file has size zero,
989 # when appending to an existing file, if the file has size zero,
987 # it should be considered as missing. Such zero-size files are
990 # it should be considered as missing. Such zero-size files are
988 # the result of truncation when a transaction is aborted.
991 # the result of truncation when a transaction is aborted.
989 notload = False
992 notload = False
990 if not notload:
993 if not notload:
991 self.fncache.add(path)
994 self.fncache.add(path)
992 return self.vfs(encoded, mode, *args, **kw)
995 return self.vfs(encoded, mode, *args, **kw)
993
996
994 def join(self, path):
997 def join(self, path):
995 if path:
998 if path:
996 return self.vfs.join(self.encode(path))
999 return self.vfs.join(self.encode(path))
997 else:
1000 else:
998 return self.vfs.join(path)
1001 return self.vfs.join(path)
999
1002
1000 def register_file(self, path):
1003 def register_file(self, path):
1001 """generic hook point to lets fncache steer its stew"""
1004 """generic hook point to lets fncache steer its stew"""
1002 if path.startswith(b'data/') or path.startswith(b'meta/'):
1005 if path.startswith(b'data/') or path.startswith(b'meta/'):
1003 self.fncache.add(path)
1006 self.fncache.add(path)
1004
1007
1005
1008
1006 class fncachestore(basicstore):
1009 class fncachestore(basicstore):
1007 def __init__(self, path, vfstype, dotencode):
1010 def __init__(self, path, vfstype, dotencode):
1008 if dotencode:
1011 if dotencode:
1009 encode = _pathencode
1012 encode = _pathencode
1010 else:
1013 else:
1011 encode = _plainhybridencode
1014 encode = _plainhybridencode
1012 self.encode = encode
1015 self.encode = encode
1013 vfs = vfstype(path + b'/store')
1016 vfs = vfstype(path + b'/store')
1014 self.path = vfs.base
1017 self.path = vfs.base
1015 self.pathsep = self.path + b'/'
1018 self.pathsep = self.path + b'/'
1016 self.createmode = _calcmode(vfs)
1019 self.createmode = _calcmode(vfs)
1017 vfs.createmode = self.createmode
1020 vfs.createmode = self.createmode
1018 self.rawvfs = vfs
1021 self.rawvfs = vfs
1019 fnc = fncache(vfs)
1022 fnc = fncache(vfs)
1020 self.fncache = fnc
1023 self.fncache = fnc
1021 self.vfs = _fncachevfs(vfs, fnc, encode)
1024 self.vfs = _fncachevfs(vfs, fnc, encode)
1022 self.opener = self.vfs
1025 self.opener = self.vfs
1023
1026
1024 def join(self, f):
1027 def join(self, f):
1025 return self.pathsep + self.encode(f)
1028 return self.pathsep + self.encode(f)
1026
1029
1027 def getsize(self, path):
1030 def getsize(self, path):
1028 return self.rawvfs.stat(path).st_size
1031 return self.rawvfs.stat(path).st_size
1029
1032
1030 def data_entries(
1033 def data_entries(
1031 self, matcher=None, undecodable=None
1034 self, matcher=None, undecodable=None
1032 ) -> Generator[BaseStoreEntry, None, None]:
1035 ) -> Generator[BaseStoreEntry, None, None]:
1033 files = ((f, revlog_type(f)) for f in self.fncache)
1036 files = ((f, revlog_type(f)) for f in self.fncache)
1034 # Note: all files in fncache should be revlog related, However the
1037 # Note: all files in fncache should be revlog related, However the
1035 # fncache might contains such file added by previous version of
1038 # fncache might contains such file added by previous version of
1036 # Mercurial.
1039 # Mercurial.
1037 files = (f for f in files if f[1] is not None)
1040 files = (f for f in files if f[1] is not None)
1038 by_revlog = _gather_revlog(files)
1041 by_revlog = _gather_revlog(files)
1039 for revlog, details in by_revlog:
1042 for revlog, details in by_revlog:
1040 file_details = {}
1043 file_details = {}
1041 if revlog.startswith(b'data/'):
1044 if revlog.startswith(b'data/'):
1042 rl_type = FILEFLAGS_FILELOG
1045 rl_type = FILEFLAGS_FILELOG
1043 revlog_target_id = revlog.split(b'/', 1)[1]
1046 revlog_target_id = revlog.split(b'/', 1)[1]
1044 elif revlog.startswith(b'meta/'):
1047 elif revlog.startswith(b'meta/'):
1045 rl_type = FILEFLAGS_MANIFESTLOG
1048 rl_type = FILEFLAGS_MANIFESTLOG
1046 # drop the initial directory and the `00manifest` file part
1049 # drop the initial directory and the `00manifest` file part
1047 tmp = revlog.split(b'/', 1)[1]
1050 tmp = revlog.split(b'/', 1)[1]
1048 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1051 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1049 else:
1052 else:
1050 # unreachable
1053 # unreachable
1051 assert False, revlog
1054 assert False, revlog
1052 for ext, t in details.items():
1055 for ext, t in details.items():
1053 file_details[ext] = {
1056 file_details[ext] = {
1054 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1057 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1055 }
1058 }
1056 entry = RevlogStoreEntry(
1059 entry = RevlogStoreEntry(
1057 path_prefix=revlog,
1060 path_prefix=revlog,
1058 revlog_type=rl_type,
1061 revlog_type=rl_type,
1059 target_id=revlog_target_id,
1062 target_id=revlog_target_id,
1060 details=file_details,
1063 details=file_details,
1061 )
1064 )
1062 if _match_tracked_entry(entry, matcher):
1065 if _match_tracked_entry(entry, matcher):
1063 yield entry
1066 yield entry
1064
1067
1065 def copylist(self):
1068 def copylist(self):
1066 d = (
1069 d = (
1067 b'bookmarks',
1070 b'bookmarks',
1068 b'narrowspec',
1071 b'narrowspec',
1069 b'data',
1072 b'data',
1070 b'meta',
1073 b'meta',
1071 b'dh',
1074 b'dh',
1072 b'fncache',
1075 b'fncache',
1073 b'phaseroots',
1076 b'phaseroots',
1074 b'obsstore',
1077 b'obsstore',
1075 b'00manifest.d',
1078 b'00manifest.d',
1076 b'00manifest.i',
1079 b'00manifest.i',
1077 b'00changelog.d',
1080 b'00changelog.d',
1078 b'00changelog.i',
1081 b'00changelog.i',
1079 b'requires',
1082 b'requires',
1080 )
1083 )
1081 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1084 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1082
1085
1083 def write(self, tr):
1086 def write(self, tr):
1084 self.fncache.write(tr)
1087 self.fncache.write(tr)
1085
1088
1086 def invalidatecaches(self):
1089 def invalidatecaches(self):
1087 self.fncache.entries = None
1090 self.fncache.entries = None
1088 self.fncache.addls = set()
1091 self.fncache.addls = set()
1089
1092
1090 def markremoved(self, fn):
1093 def markremoved(self, fn):
1091 self.fncache.remove(fn)
1094 self.fncache.remove(fn)
1092
1095
1093 def _exists(self, f):
1096 def _exists(self, f):
1094 ef = self.encode(f)
1097 ef = self.encode(f)
1095 try:
1098 try:
1096 self.getsize(ef)
1099 self.getsize(ef)
1097 return True
1100 return True
1098 except FileNotFoundError:
1101 except FileNotFoundError:
1099 return False
1102 return False
1100
1103
1101 def __contains__(self, path):
1104 def __contains__(self, path):
1102 '''Checks if the store contains path'''
1105 '''Checks if the store contains path'''
1103 path = b"/".join((b"data", path))
1106 path = b"/".join((b"data", path))
1104 # check for files (exact match)
1107 # check for files (exact match)
1105 e = path + b'.i'
1108 e = path + b'.i'
1106 if e in self.fncache and self._exists(e):
1109 if e in self.fncache and self._exists(e):
1107 return True
1110 return True
1108 # now check for directories (prefix match)
1111 # now check for directories (prefix match)
1109 if not path.endswith(b'/'):
1112 if not path.endswith(b'/'):
1110 path += b'/'
1113 path += b'/'
1111 for e in self.fncache:
1114 for e in self.fncache:
1112 if e.startswith(path) and self._exists(e):
1115 if e.startswith(path) and self._exists(e):
1113 return True
1116 return True
1114 return False
1117 return False
General Comments 0
You need to be logged in to leave comments. Login now