##// END OF EJS Templates
store: introduce boolean property for revlog type...
marmoute -
r51390:66c55696 default
parent child Browse files
Show More
@@ -1,1067 +1,1079 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _match_tracked_entry(entry, matcher):
36 def _match_tracked_entry(entry, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 if entry.revlog_type == FILEFLAGS_FILELOG:
44 if entry.revlog_type == FILEFLAGS_FILELOG:
45 return matcher(entry.target_id)
45 return matcher(entry.target_id)
46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49
49
50
50
51 # This avoids a collision between a file named foo and a dir named
51 # This avoids a collision between a file named foo and a dir named
52 # foo.i or foo.d
52 # foo.i or foo.d
53 def _encodedir(path):
53 def _encodedir(path):
54 """
54 """
55 >>> _encodedir(b'data/foo.i')
55 >>> _encodedir(b'data/foo.i')
56 'data/foo.i'
56 'data/foo.i'
57 >>> _encodedir(b'data/foo.i/bla.i')
57 >>> _encodedir(b'data/foo.i/bla.i')
58 'data/foo.i.hg/bla.i'
58 'data/foo.i.hg/bla.i'
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 'data/foo.i.hg.hg/bla.i'
60 'data/foo.i.hg.hg/bla.i'
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 """
63 """
64 return (
64 return (
65 path.replace(b".hg/", b".hg.hg/")
65 path.replace(b".hg/", b".hg.hg/")
66 .replace(b".i/", b".i.hg/")
66 .replace(b".i/", b".i.hg/")
67 .replace(b".d/", b".d.hg/")
67 .replace(b".d/", b".d.hg/")
68 )
68 )
69
69
70
70
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72
72
73
73
74 def decodedir(path):
74 def decodedir(path):
75 """
75 """
76 >>> decodedir(b'data/foo.i')
76 >>> decodedir(b'data/foo.i')
77 'data/foo.i'
77 'data/foo.i'
78 >>> decodedir(b'data/foo.i.hg/bla.i')
78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 'data/foo.i/bla.i'
79 'data/foo.i/bla.i'
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 'data/foo.i.hg/bla.i'
81 'data/foo.i.hg/bla.i'
82 """
82 """
83 if b".hg/" not in path:
83 if b".hg/" not in path:
84 return path
84 return path
85 return (
85 return (
86 path.replace(b".d.hg/", b".d/")
86 path.replace(b".d.hg/", b".d/")
87 .replace(b".i.hg/", b".i/")
87 .replace(b".i.hg/", b".i/")
88 .replace(b".hg.hg/", b".hg/")
88 .replace(b".hg.hg/", b".hg/")
89 )
89 )
90
90
91
91
92 def _reserved():
92 def _reserved():
93 """characters that are problematic for filesystems
93 """characters that are problematic for filesystems
94
94
95 * ascii escapes (0..31)
95 * ascii escapes (0..31)
96 * ascii hi (126..255)
96 * ascii hi (126..255)
97 * windows specials
97 * windows specials
98
98
99 these characters will be escaped by encodefunctions
99 these characters will be escaped by encodefunctions
100 """
100 """
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 for x in range(32):
102 for x in range(32):
103 yield x
103 yield x
104 for x in range(126, 256):
104 for x in range(126, 256):
105 yield x
105 yield x
106 for x in winreserved:
106 for x in winreserved:
107 yield x
107 yield x
108
108
109
109
110 def _buildencodefun():
110 def _buildencodefun():
111 """
111 """
112 >>> enc, dec = _buildencodefun()
112 >>> enc, dec = _buildencodefun()
113
113
114 >>> enc(b'nothing/special.txt')
114 >>> enc(b'nothing/special.txt')
115 'nothing/special.txt'
115 'nothing/special.txt'
116 >>> dec(b'nothing/special.txt')
116 >>> dec(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118
118
119 >>> enc(b'HELLO')
119 >>> enc(b'HELLO')
120 '_h_e_l_l_o'
120 '_h_e_l_l_o'
121 >>> dec(b'_h_e_l_l_o')
121 >>> dec(b'_h_e_l_l_o')
122 'HELLO'
122 'HELLO'
123
123
124 >>> enc(b'hello:world?')
124 >>> enc(b'hello:world?')
125 'hello~3aworld~3f'
125 'hello~3aworld~3f'
126 >>> dec(b'hello~3aworld~3f')
126 >>> dec(b'hello~3aworld~3f')
127 'hello:world?'
127 'hello:world?'
128
128
129 >>> enc(b'the\\x07quick\\xADshot')
129 >>> enc(b'the\\x07quick\\xADshot')
130 'the~07quick~adshot'
130 'the~07quick~adshot'
131 >>> dec(b'the~07quick~adshot')
131 >>> dec(b'the~07quick~adshot')
132 'the\\x07quick\\xadshot'
132 'the\\x07quick\\xadshot'
133 """
133 """
134 e = b'_'
134 e = b'_'
135 xchr = pycompat.bytechr
135 xchr = pycompat.bytechr
136 asciistr = list(map(xchr, range(127)))
136 asciistr = list(map(xchr, range(127)))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138
138
139 cmap = {x: x for x in asciistr}
139 cmap = {x: x for x in asciistr}
140 for x in _reserved():
140 for x in _reserved():
141 cmap[xchr(x)] = b"~%02x" % x
141 cmap[xchr(x)] = b"~%02x" % x
142 for x in capitals + [ord(e)]:
142 for x in capitals + [ord(e)]:
143 cmap[xchr(x)] = e + xchr(x).lower()
143 cmap[xchr(x)] = e + xchr(x).lower()
144
144
145 dmap = {}
145 dmap = {}
146 for k, v in cmap.items():
146 for k, v in cmap.items():
147 dmap[v] = k
147 dmap[v] = k
148
148
149 def decode(s):
149 def decode(s):
150 i = 0
150 i = 0
151 while i < len(s):
151 while i < len(s):
152 for l in range(1, 4):
152 for l in range(1, 4):
153 try:
153 try:
154 yield dmap[s[i : i + l]]
154 yield dmap[s[i : i + l]]
155 i += l
155 i += l
156 break
156 break
157 except KeyError:
157 except KeyError:
158 pass
158 pass
159 else:
159 else:
160 raise KeyError
160 raise KeyError
161
161
162 return (
162 return (
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join(list(decode(s))),
164 lambda s: b''.join(list(decode(s))),
165 )
165 )
166
166
167
167
168 _encodefname, _decodefname = _buildencodefun()
168 _encodefname, _decodefname = _buildencodefun()
169
169
170
170
171 def encodefilename(s):
171 def encodefilename(s):
172 """
172 """
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 """
175 """
176 return _encodefname(encodedir(s))
176 return _encodefname(encodedir(s))
177
177
178
178
179 def decodefilename(s):
179 def decodefilename(s):
180 """
180 """
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 """
183 """
184 return decodedir(_decodefname(s))
184 return decodedir(_decodefname(s))
185
185
186
186
187 def _buildlowerencodefun():
187 def _buildlowerencodefun():
188 """
188 """
189 >>> f = _buildlowerencodefun()
189 >>> f = _buildlowerencodefun()
190 >>> f(b'nothing/special.txt')
190 >>> f(b'nothing/special.txt')
191 'nothing/special.txt'
191 'nothing/special.txt'
192 >>> f(b'HELLO')
192 >>> f(b'HELLO')
193 'hello'
193 'hello'
194 >>> f(b'hello:world?')
194 >>> f(b'hello:world?')
195 'hello~3aworld~3f'
195 'hello~3aworld~3f'
196 >>> f(b'the\\x07quick\\xADshot')
196 >>> f(b'the\\x07quick\\xADshot')
197 'the~07quick~adshot'
197 'the~07quick~adshot'
198 """
198 """
199 xchr = pycompat.bytechr
199 xchr = pycompat.bytechr
200 cmap = {xchr(x): xchr(x) for x in range(127)}
200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 for x in _reserved():
201 for x in _reserved():
202 cmap[xchr(x)] = b"~%02x" % x
202 cmap[xchr(x)] = b"~%02x" % x
203 for x in range(ord(b"A"), ord(b"Z") + 1):
203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 cmap[xchr(x)] = xchr(x).lower()
204 cmap[xchr(x)] = xchr(x).lower()
205
205
206 def lowerencode(s):
206 def lowerencode(s):
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208
208
209 return lowerencode
209 return lowerencode
210
210
211
211
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213
213
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217
217
218
218
219 def _auxencode(path, dotencode):
219 def _auxencode(path, dotencode):
220 """
220 """
221 Encodes filenames containing names reserved by Windows or which end in
221 Encodes filenames containing names reserved by Windows or which end in
222 period or space. Does not touch other single reserved characters c.
222 period or space. Does not touch other single reserved characters c.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Additionally encodes space or period at the beginning, if dotencode is
224 Additionally encodes space or period at the beginning, if dotencode is
225 True. Parameter path is assumed to be all lowercase.
225 True. Parameter path is assumed to be all lowercase.
226 A segment only needs encoding if a reserved name appears as a
226 A segment only needs encoding if a reserved name appears as a
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 doesn't need encoding.
228 doesn't need encoding.
229
229
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> _auxencode(s.split(b'/'), True)
231 >>> _auxencode(s.split(b'/'), True)
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> _auxencode(s.split(b'/'), False)
234 >>> _auxencode(s.split(b'/'), False)
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 >>> _auxencode([b'foo. '], True)
236 >>> _auxencode([b'foo. '], True)
237 ['foo.~20']
237 ['foo.~20']
238 >>> _auxencode([b' .foo'], True)
238 >>> _auxencode([b' .foo'], True)
239 ['~20.foo']
239 ['~20.foo']
240 """
240 """
241 for i, n in enumerate(path):
241 for i, n in enumerate(path):
242 if not n:
242 if not n:
243 continue
243 continue
244 if dotencode and n[0] in b'. ':
244 if dotencode and n[0] in b'. ':
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 path[i] = n
246 path[i] = n
247 else:
247 else:
248 l = n.find(b'.')
248 l = n.find(b'.')
249 if l == -1:
249 if l == -1:
250 l = len(n)
250 l = len(n)
251 if (l == 3 and n[:3] in _winres3) or (
251 if (l == 3 and n[:3] in _winres3) or (
252 l == 4
252 l == 4
253 and n[3:4] <= b'9'
253 and n[3:4] <= b'9'
254 and n[3:4] >= b'1'
254 and n[3:4] >= b'1'
255 and n[:3] in _winres4
255 and n[:3] in _winres4
256 ):
256 ):
257 # encode third letter ('aux' -> 'au~78')
257 # encode third letter ('aux' -> 'au~78')
258 ec = b"~%02x" % ord(n[2:3])
258 ec = b"~%02x" % ord(n[2:3])
259 n = n[0:2] + ec + n[3:]
259 n = n[0:2] + ec + n[3:]
260 path[i] = n
260 path[i] = n
261 if n[-1] in b'. ':
261 if n[-1] in b'. ':
262 # encode last period or space ('foo...' -> 'foo..~2e')
262 # encode last period or space ('foo...' -> 'foo..~2e')
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 return path
264 return path
265
265
266
266
267 _maxstorepathlen = 120
267 _maxstorepathlen = 120
268 _dirprefixlen = 8
268 _dirprefixlen = 8
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270
270
271
271
272 def _hashencode(path, dotencode):
272 def _hashencode(path, dotencode):
273 digest = hex(hashutil.sha1(path).digest())
273 digest = hex(hashutil.sha1(path).digest())
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 parts = _auxencode(le, dotencode)
275 parts = _auxencode(le, dotencode)
276 basename = parts[-1]
276 basename = parts[-1]
277 _root, ext = os.path.splitext(basename)
277 _root, ext = os.path.splitext(basename)
278 sdirs = []
278 sdirs = []
279 sdirslen = 0
279 sdirslen = 0
280 for p in parts[:-1]:
280 for p in parts[:-1]:
281 d = p[:_dirprefixlen]
281 d = p[:_dirprefixlen]
282 if d[-1] in b'. ':
282 if d[-1] in b'. ':
283 # Windows can't access dirs ending in period or space
283 # Windows can't access dirs ending in period or space
284 d = d[:-1] + b'_'
284 d = d[:-1] + b'_'
285 if sdirslen == 0:
285 if sdirslen == 0:
286 t = len(d)
286 t = len(d)
287 else:
287 else:
288 t = sdirslen + 1 + len(d)
288 t = sdirslen + 1 + len(d)
289 if t > _maxshortdirslen:
289 if t > _maxshortdirslen:
290 break
290 break
291 sdirs.append(d)
291 sdirs.append(d)
292 sdirslen = t
292 sdirslen = t
293 dirs = b'/'.join(sdirs)
293 dirs = b'/'.join(sdirs)
294 if len(dirs) > 0:
294 if len(dirs) > 0:
295 dirs += b'/'
295 dirs += b'/'
296 res = b'dh/' + dirs + digest + ext
296 res = b'dh/' + dirs + digest + ext
297 spaceleft = _maxstorepathlen - len(res)
297 spaceleft = _maxstorepathlen - len(res)
298 if spaceleft > 0:
298 if spaceleft > 0:
299 filler = basename[:spaceleft]
299 filler = basename[:spaceleft]
300 res = b'dh/' + dirs + filler + digest + ext
300 res = b'dh/' + dirs + filler + digest + ext
301 return res
301 return res
302
302
303
303
304 def _hybridencode(path, dotencode):
304 def _hybridencode(path, dotencode):
305 """encodes path with a length limit
305 """encodes path with a length limit
306
306
307 Encodes all paths that begin with 'data/', according to the following.
307 Encodes all paths that begin with 'data/', according to the following.
308
308
309 Default encoding (reversible):
309 Default encoding (reversible):
310
310
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 characters are encoded as '~xx', where xx is the two digit hex code
312 characters are encoded as '~xx', where xx is the two digit hex code
313 of the character (see encodefilename).
313 of the character (see encodefilename).
314 Relevant path components consisting of Windows reserved filenames are
314 Relevant path components consisting of Windows reserved filenames are
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316
316
317 Hashed encoding (not reversible):
317 Hashed encoding (not reversible):
318
318
319 If the default-encoded path is longer than _maxstorepathlen, a
319 If the default-encoded path is longer than _maxstorepathlen, a
320 non-reversible hybrid hashing of the path is done instead.
320 non-reversible hybrid hashing of the path is done instead.
321 This encoding uses up to _dirprefixlen characters of all directory
321 This encoding uses up to _dirprefixlen characters of all directory
322 levels of the lowerencoded path, but not more levels than can fit into
322 levels of the lowerencoded path, but not more levels than can fit into
323 _maxshortdirslen.
323 _maxshortdirslen.
324 Then follows the filler followed by the sha digest of the full path.
324 Then follows the filler followed by the sha digest of the full path.
325 The filler is the beginning of the basename of the lowerencoded path
325 The filler is the beginning of the basename of the lowerencoded path
326 (the basename is everything after the last path separator). The filler
326 (the basename is everything after the last path separator). The filler
327 is as long as possible, filling in characters from the basename until
327 is as long as possible, filling in characters from the basename until
328 the encoded path has _maxstorepathlen characters (or all chars of the
328 the encoded path has _maxstorepathlen characters (or all chars of the
329 basename have been taken).
329 basename have been taken).
330 The extension (e.g. '.i' or '.d') is preserved.
330 The extension (e.g. '.i' or '.d') is preserved.
331
331
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 encoding was used.
333 encoding was used.
334 """
334 """
335 path = encodedir(path)
335 path = encodedir(path)
336 ef = _encodefname(path).split(b'/')
336 ef = _encodefname(path).split(b'/')
337 res = b'/'.join(_auxencode(ef, dotencode))
337 res = b'/'.join(_auxencode(ef, dotencode))
338 if len(res) > _maxstorepathlen:
338 if len(res) > _maxstorepathlen:
339 res = _hashencode(path, dotencode)
339 res = _hashencode(path, dotencode)
340 return res
340 return res
341
341
342
342
343 def _pathencode(path):
343 def _pathencode(path):
344 de = encodedir(path)
344 de = encodedir(path)
345 if len(path) > _maxstorepathlen:
345 if len(path) > _maxstorepathlen:
346 return _hashencode(de, True)
346 return _hashencode(de, True)
347 ef = _encodefname(de).split(b'/')
347 ef = _encodefname(de).split(b'/')
348 res = b'/'.join(_auxencode(ef, True))
348 res = b'/'.join(_auxencode(ef, True))
349 if len(res) > _maxstorepathlen:
349 if len(res) > _maxstorepathlen:
350 return _hashencode(de, True)
350 return _hashencode(de, True)
351 return res
351 return res
352
352
353
353
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355
355
356
356
357 def _plainhybridencode(f):
357 def _plainhybridencode(f):
358 return _hybridencode(f, False)
358 return _hybridencode(f, False)
359
359
360
360
361 def _calcmode(vfs):
361 def _calcmode(vfs):
362 try:
362 try:
363 # files in .hg/ will be created using this mode
363 # files in .hg/ will be created using this mode
364 mode = vfs.stat().st_mode
364 mode = vfs.stat().st_mode
365 # avoid some useless chmods
365 # avoid some useless chmods
366 if (0o777 & ~util.umask) == (0o777 & mode):
366 if (0o777 & ~util.umask) == (0o777 & mode):
367 mode = None
367 mode = None
368 except OSError:
368 except OSError:
369 mode = None
369 mode = None
370 return mode
370 return mode
371
371
372
372
373 _data = [
373 _data = [
374 b'bookmarks',
374 b'bookmarks',
375 b'narrowspec',
375 b'narrowspec',
376 b'data',
376 b'data',
377 b'meta',
377 b'meta',
378 b'00manifest.d',
378 b'00manifest.d',
379 b'00manifest.i',
379 b'00manifest.i',
380 b'00changelog.d',
380 b'00changelog.d',
381 b'00changelog.i',
381 b'00changelog.i',
382 b'phaseroots',
382 b'phaseroots',
383 b'obsstore',
383 b'obsstore',
384 b'requires',
384 b'requires',
385 ]
385 ]
386
386
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_OTHER_EXT = (
388 REVLOG_FILES_OTHER_EXT = (
389 b'.idx',
389 b'.idx',
390 b'.d',
390 b'.d',
391 b'.dat',
391 b'.dat',
392 b'.n',
392 b'.n',
393 b'.nd',
393 b'.nd',
394 b'.sda',
394 b'.sda',
395 )
395 )
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 REVLOG_FILES_LONG_EXT = (
397 REVLOG_FILES_LONG_EXT = (
398 b'.nd',
398 b'.nd',
399 b'.idx',
399 b'.idx',
400 b'.dat',
400 b'.dat',
401 b'.sda',
401 b'.sda',
402 )
402 )
403 # files that are "volatile" and might change between listing and streaming
403 # files that are "volatile" and might change between listing and streaming
404 #
404 #
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # deleted.
406 # deleted.
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408
408
409 # some exception to the above matching
409 # some exception to the above matching
410 #
410 #
411 # XXX This is currently not in use because of issue6542
411 # XXX This is currently not in use because of issue6542
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413
413
414
414
415 def is_revlog(f, kind, st):
415 def is_revlog(f, kind, st):
416 if kind != stat.S_IFREG:
416 if kind != stat.S_IFREG:
417 return None
417 return None
418 return revlog_type(f)
418 return revlog_type(f)
419
419
420
420
421 def revlog_type(f):
421 def revlog_type(f):
422 # XXX we need to filter `undo.` created by the transaction here, however
422 # XXX we need to filter `undo.` created by the transaction here, however
423 # being naive about it also filter revlog for `undo.*` files, leading to
423 # being naive about it also filter revlog for `undo.*` files, leading to
424 # issue6542. So we no longer use EXCLUDED.
424 # issue6542. So we no longer use EXCLUDED.
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 return FILEFLAGS_REVLOG_MAIN
426 return FILEFLAGS_REVLOG_MAIN
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 t = FILETYPE_FILELOG_OTHER
428 t = FILETYPE_FILELOG_OTHER
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 t |= FILEFLAGS_VOLATILE
430 t |= FILEFLAGS_VOLATILE
431 return t
431 return t
432 return None
432 return None
433
433
434
434
435 # the file is part of changelog data
435 # the file is part of changelog data
436 FILEFLAGS_CHANGELOG = 1 << 13
436 FILEFLAGS_CHANGELOG = 1 << 13
437 # the file is part of manifest data
437 # the file is part of manifest data
438 FILEFLAGS_MANIFESTLOG = 1 << 12
438 FILEFLAGS_MANIFESTLOG = 1 << 12
439 # the file is part of filelog data
439 # the file is part of filelog data
440 FILEFLAGS_FILELOG = 1 << 11
440 FILEFLAGS_FILELOG = 1 << 11
441 # file that are not directly part of a revlog
441 # file that are not directly part of a revlog
442 FILEFLAGS_OTHER = 1 << 10
442 FILEFLAGS_OTHER = 1 << 10
443
443
444 # the main entry point for a revlog
444 # the main entry point for a revlog
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 # a secondary file for a revlog
446 # a secondary file for a revlog
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
448
448
449 # files that are "volatile" and might change between listing and streaming
449 # files that are "volatile" and might change between listing and streaming
450 FILEFLAGS_VOLATILE = 1 << 20
450 FILEFLAGS_VOLATILE = 1 << 20
451
451
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
459
459
460
460
461 @attr.s(slots=True, init=False)
461 @attr.s(slots=True, init=False)
462 class BaseStoreEntry:
462 class BaseStoreEntry:
463 """An entry in the store
463 """An entry in the store
464
464
465 This is returned by `store.walk` and represent some data in the store."""
465 This is returned by `store.walk` and represent some data in the store."""
466
466
467
467
468 @attr.s(slots=True, init=False)
468 @attr.s(slots=True, init=False)
469 class SimpleStoreEntry(BaseStoreEntry):
469 class SimpleStoreEntry(BaseStoreEntry):
470 """A generic entry in the store"""
470 """A generic entry in the store"""
471
471
472 is_revlog = False
472 is_revlog = False
473
473
474 _entry_path = attr.ib()
474 _entry_path = attr.ib()
475 _is_volatile = attr.ib(default=False)
475 _is_volatile = attr.ib(default=False)
476 _file_size = attr.ib(default=None)
476 _file_size = attr.ib(default=None)
477
477
478 def __init__(
478 def __init__(
479 self,
479 self,
480 entry_path,
480 entry_path,
481 is_volatile=False,
481 is_volatile=False,
482 file_size=None,
482 file_size=None,
483 ):
483 ):
484 super().__init__()
484 super().__init__()
485 self._entry_path = entry_path
485 self._entry_path = entry_path
486 self._is_volatile = is_volatile
486 self._is_volatile = is_volatile
487 self._file_size = file_size
487 self._file_size = file_size
488
488
489 def files(self):
489 def files(self):
490 return [
490 return [
491 StoreFile(
491 StoreFile(
492 unencoded_path=self._entry_path,
492 unencoded_path=self._entry_path,
493 file_size=self._file_size,
493 file_size=self._file_size,
494 is_volatile=self._is_volatile,
494 is_volatile=self._is_volatile,
495 )
495 )
496 ]
496 ]
497
497
498
498
499 @attr.s(slots=True, init=False)
499 @attr.s(slots=True, init=False)
500 class RevlogStoreEntry(BaseStoreEntry):
500 class RevlogStoreEntry(BaseStoreEntry):
501 """A revlog entry in the store"""
501 """A revlog entry in the store"""
502
502
503 is_revlog = True
503 is_revlog = True
504
504
505 revlog_type = attr.ib(default=None)
505 revlog_type = attr.ib(default=None)
506 target_id = attr.ib(default=None)
506 target_id = attr.ib(default=None)
507 _path_prefix = attr.ib(default=None)
507 _path_prefix = attr.ib(default=None)
508 _details = attr.ib(default=None)
508 _details = attr.ib(default=None)
509
509
510 def __init__(
510 def __init__(
511 self,
511 self,
512 revlog_type,
512 revlog_type,
513 path_prefix,
513 path_prefix,
514 target_id,
514 target_id,
515 details,
515 details,
516 ):
516 ):
517 super().__init__()
517 super().__init__()
518 self.revlog_type = revlog_type
518 self.revlog_type = revlog_type
519 self.target_id = target_id
519 self.target_id = target_id
520 self._path_prefix = path_prefix
520 self._path_prefix = path_prefix
521 assert b'.i' in details, (path_prefix, details)
521 assert b'.i' in details, (path_prefix, details)
522 self._details = details
522 self._details = details
523
523
524 @property
525 def is_changelog(self):
526 return self.revlog_type & FILEFLAGS_CHANGELOG
527
528 @property
529 def is_manifestlog(self):
530 return self.revlog_type & FILEFLAGS_MANIFESTLOG
531
532 @property
533 def is_filelog(self):
534 return self.revlog_type & FILEFLAGS_FILELOG
535
524 def main_file_path(self):
536 def main_file_path(self):
525 """unencoded path of the main revlog file"""
537 """unencoded path of the main revlog file"""
526 return self._path_prefix + b'.i'
538 return self._path_prefix + b'.i'
527
539
528 def files(self):
540 def files(self):
529 files = []
541 files = []
530 for ext in sorted(self._details, key=_ext_key):
542 for ext in sorted(self._details, key=_ext_key):
531 path = self._path_prefix + ext
543 path = self._path_prefix + ext
532 data = self._details[ext]
544 data = self._details[ext]
533 files.append(StoreFile(unencoded_path=path, **data))
545 files.append(StoreFile(unencoded_path=path, **data))
534 return files
546 return files
535
547
536
548
537 @attr.s(slots=True)
549 @attr.s(slots=True)
538 class StoreFile:
550 class StoreFile:
539 """a file matching an entry"""
551 """a file matching an entry"""
540
552
541 unencoded_path = attr.ib()
553 unencoded_path = attr.ib()
542 _file_size = attr.ib(default=None)
554 _file_size = attr.ib(default=None)
543 is_volatile = attr.ib(default=False)
555 is_volatile = attr.ib(default=False)
544
556
545 def file_size(self, vfs):
557 def file_size(self, vfs):
546 if self._file_size is not None:
558 if self._file_size is not None:
547 return self._file_size
559 return self._file_size
548 try:
560 try:
549 return vfs.stat(self.unencoded_path).st_size
561 return vfs.stat(self.unencoded_path).st_size
550 except FileNotFoundError:
562 except FileNotFoundError:
551 return 0
563 return 0
552
564
553
565
554 def _gather_revlog(files_data):
566 def _gather_revlog(files_data):
555 """group files per revlog prefix
567 """group files per revlog prefix
556
568
557 The returns a two level nested dict. The top level key is the revlog prefix
569 The returns a two level nested dict. The top level key is the revlog prefix
558 without extension, the second level is all the file "suffix" that were
570 without extension, the second level is all the file "suffix" that were
559 seen for this revlog and arbitrary file data as value.
571 seen for this revlog and arbitrary file data as value.
560 """
572 """
561 revlogs = collections.defaultdict(dict)
573 revlogs = collections.defaultdict(dict)
562 for u, value in files_data:
574 for u, value in files_data:
563 name, ext = _split_revlog_ext(u)
575 name, ext = _split_revlog_ext(u)
564 revlogs[name][ext] = value
576 revlogs[name][ext] = value
565 return sorted(revlogs.items())
577 return sorted(revlogs.items())
566
578
567
579
568 def _split_revlog_ext(filename):
580 def _split_revlog_ext(filename):
569 """split the revlog file prefix from the variable extension"""
581 """split the revlog file prefix from the variable extension"""
570 if filename.endswith(REVLOG_FILES_LONG_EXT):
582 if filename.endswith(REVLOG_FILES_LONG_EXT):
571 char = b'-'
583 char = b'-'
572 else:
584 else:
573 char = b'.'
585 char = b'.'
574 idx = filename.rfind(char)
586 idx = filename.rfind(char)
575 return filename[:idx], filename[idx:]
587 return filename[:idx], filename[idx:]
576
588
577
589
578 def _ext_key(ext):
590 def _ext_key(ext):
579 """a key to order revlog suffix
591 """a key to order revlog suffix
580
592
581 important to issue .i after other entry."""
593 important to issue .i after other entry."""
582 # the only important part of this order is to keep the `.i` last.
594 # the only important part of this order is to keep the `.i` last.
583 if ext.endswith(b'.n'):
595 if ext.endswith(b'.n'):
584 return (0, ext)
596 return (0, ext)
585 elif ext.endswith(b'.nd'):
597 elif ext.endswith(b'.nd'):
586 return (10, ext)
598 return (10, ext)
587 elif ext.endswith(b'.d'):
599 elif ext.endswith(b'.d'):
588 return (20, ext)
600 return (20, ext)
589 elif ext.endswith(b'.i'):
601 elif ext.endswith(b'.i'):
590 return (50, ext)
602 return (50, ext)
591 else:
603 else:
592 return (40, ext)
604 return (40, ext)
593
605
594
606
595 class basicstore:
607 class basicstore:
596 '''base class for local repository stores'''
608 '''base class for local repository stores'''
597
609
598 def __init__(self, path, vfstype):
610 def __init__(self, path, vfstype):
599 vfs = vfstype(path)
611 vfs = vfstype(path)
600 self.path = vfs.base
612 self.path = vfs.base
601 self.createmode = _calcmode(vfs)
613 self.createmode = _calcmode(vfs)
602 vfs.createmode = self.createmode
614 vfs.createmode = self.createmode
603 self.rawvfs = vfs
615 self.rawvfs = vfs
604 self.vfs = vfsmod.filtervfs(vfs, encodedir)
616 self.vfs = vfsmod.filtervfs(vfs, encodedir)
605 self.opener = self.vfs
617 self.opener = self.vfs
606
618
607 def join(self, f):
619 def join(self, f):
608 return self.path + b'/' + encodedir(f)
620 return self.path + b'/' + encodedir(f)
609
621
610 def _walk(self, relpath, recurse, undecodable=None):
622 def _walk(self, relpath, recurse, undecodable=None):
611 '''yields (revlog_type, unencoded, size)'''
623 '''yields (revlog_type, unencoded, size)'''
612 path = self.path
624 path = self.path
613 if relpath:
625 if relpath:
614 path += b'/' + relpath
626 path += b'/' + relpath
615 striplen = len(self.path) + 1
627 striplen = len(self.path) + 1
616 l = []
628 l = []
617 if self.rawvfs.isdir(path):
629 if self.rawvfs.isdir(path):
618 visit = [path]
630 visit = [path]
619 readdir = self.rawvfs.readdir
631 readdir = self.rawvfs.readdir
620 while visit:
632 while visit:
621 p = visit.pop()
633 p = visit.pop()
622 for f, kind, st in readdir(p, stat=True):
634 for f, kind, st in readdir(p, stat=True):
623 fp = p + b'/' + f
635 fp = p + b'/' + f
624 rl_type = is_revlog(f, kind, st)
636 rl_type = is_revlog(f, kind, st)
625 if rl_type is not None:
637 if rl_type is not None:
626 n = util.pconvert(fp[striplen:])
638 n = util.pconvert(fp[striplen:])
627 l.append((decodedir(n), (rl_type, st.st_size)))
639 l.append((decodedir(n), (rl_type, st.st_size)))
628 elif kind == stat.S_IFDIR and recurse:
640 elif kind == stat.S_IFDIR and recurse:
629 visit.append(fp)
641 visit.append(fp)
630
642
631 l.sort()
643 l.sort()
632 return l
644 return l
633
645
634 def changelog(self, trypending, concurrencychecker=None):
646 def changelog(self, trypending, concurrencychecker=None):
635 return changelog.changelog(
647 return changelog.changelog(
636 self.vfs,
648 self.vfs,
637 trypending=trypending,
649 trypending=trypending,
638 concurrencychecker=concurrencychecker,
650 concurrencychecker=concurrencychecker,
639 )
651 )
640
652
641 def manifestlog(self, repo, storenarrowmatch):
653 def manifestlog(self, repo, storenarrowmatch):
642 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
654 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
643 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
655 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
644
656
645 def datafiles(
657 def datafiles(
646 self, matcher=None, undecodable=None
658 self, matcher=None, undecodable=None
647 ) -> Generator[BaseStoreEntry, None, None]:
659 ) -> Generator[BaseStoreEntry, None, None]:
648 """Like walk, but excluding the changelog and root manifest.
660 """Like walk, but excluding the changelog and root manifest.
649
661
650 When [undecodable] is None, revlogs names that can't be
662 When [undecodable] is None, revlogs names that can't be
651 decoded cause an exception. When it is provided, it should
663 decoded cause an exception. When it is provided, it should
652 be a list and the filenames that can't be decoded are added
664 be a list and the filenames that can't be decoded are added
653 to it instead. This is very rarely needed."""
665 to it instead. This is very rarely needed."""
654 dirs = [
666 dirs = [
655 (b'data', FILEFLAGS_FILELOG),
667 (b'data', FILEFLAGS_FILELOG),
656 (b'meta', FILEFLAGS_MANIFESTLOG),
668 (b'meta', FILEFLAGS_MANIFESTLOG),
657 ]
669 ]
658 for base_dir, rl_type in dirs:
670 for base_dir, rl_type in dirs:
659 files = self._walk(base_dir, True, undecodable=undecodable)
671 files = self._walk(base_dir, True, undecodable=undecodable)
660 files = (f for f in files if f[1][0] is not None)
672 files = (f for f in files if f[1][0] is not None)
661 for revlog, details in _gather_revlog(files):
673 for revlog, details in _gather_revlog(files):
662 file_details = {}
674 file_details = {}
663 revlog_target_id = revlog.split(b'/', 1)[1]
675 revlog_target_id = revlog.split(b'/', 1)[1]
664 for ext, (t, s) in sorted(details.items()):
676 for ext, (t, s) in sorted(details.items()):
665 file_details[ext] = {
677 file_details[ext] = {
666 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
678 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
667 'file_size': s,
679 'file_size': s,
668 }
680 }
669 yield RevlogStoreEntry(
681 yield RevlogStoreEntry(
670 path_prefix=revlog,
682 path_prefix=revlog,
671 revlog_type=rl_type,
683 revlog_type=rl_type,
672 target_id=revlog_target_id,
684 target_id=revlog_target_id,
673 details=file_details,
685 details=file_details,
674 )
686 )
675
687
676 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
688 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
677 files = reversed(self._walk(b'', False))
689 files = reversed(self._walk(b'', False))
678
690
679 changelogs = collections.defaultdict(dict)
691 changelogs = collections.defaultdict(dict)
680 manifestlogs = collections.defaultdict(dict)
692 manifestlogs = collections.defaultdict(dict)
681
693
682 for u, (t, s) in files:
694 for u, (t, s) in files:
683 if u.startswith(b'00changelog'):
695 if u.startswith(b'00changelog'):
684 name, ext = _split_revlog_ext(u)
696 name, ext = _split_revlog_ext(u)
685 changelogs[name][ext] = (t, s)
697 changelogs[name][ext] = (t, s)
686 elif u.startswith(b'00manifest'):
698 elif u.startswith(b'00manifest'):
687 name, ext = _split_revlog_ext(u)
699 name, ext = _split_revlog_ext(u)
688 manifestlogs[name][ext] = (t, s)
700 manifestlogs[name][ext] = (t, s)
689 else:
701 else:
690 yield SimpleStoreEntry(
702 yield SimpleStoreEntry(
691 entry_path=u,
703 entry_path=u,
692 is_volatile=bool(t & FILEFLAGS_VOLATILE),
704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
693 file_size=s,
705 file_size=s,
694 )
706 )
695 # yield manifest before changelog
707 # yield manifest before changelog
696 top_rl = [
708 top_rl = [
697 (manifestlogs, FILEFLAGS_MANIFESTLOG),
709 (manifestlogs, FILEFLAGS_MANIFESTLOG),
698 (changelogs, FILEFLAGS_CHANGELOG),
710 (changelogs, FILEFLAGS_CHANGELOG),
699 ]
711 ]
700 assert len(manifestlogs) <= 1
712 assert len(manifestlogs) <= 1
701 assert len(changelogs) <= 1
713 assert len(changelogs) <= 1
702 for data, revlog_type in top_rl:
714 for data, revlog_type in top_rl:
703 for revlog, details in sorted(data.items()):
715 for revlog, details in sorted(data.items()):
704 file_details = {}
716 file_details = {}
705 for ext, (t, s) in details.items():
717 for ext, (t, s) in details.items():
706 file_details[ext] = {
718 file_details[ext] = {
707 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
719 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
708 'file_size': s,
720 'file_size': s,
709 }
721 }
710 yield RevlogStoreEntry(
722 yield RevlogStoreEntry(
711 path_prefix=revlog,
723 path_prefix=revlog,
712 revlog_type=revlog_type,
724 revlog_type=revlog_type,
713 target_id=b'',
725 target_id=b'',
714 details=file_details,
726 details=file_details,
715 )
727 )
716
728
717 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
729 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
718 """return files related to data storage (ie: revlogs)
730 """return files related to data storage (ie: revlogs)
719
731
720 yields (file_type, unencoded, size)
732 yields (file_type, unencoded, size)
721
733
722 if a matcher is passed, storage files of only those tracked paths
734 if a matcher is passed, storage files of only those tracked paths
723 are passed with matches the matcher
735 are passed with matches the matcher
724 """
736 """
725 # yield data files first
737 # yield data files first
726 for x in self.datafiles(matcher):
738 for x in self.datafiles(matcher):
727 yield x
739 yield x
728 for x in self.topfiles():
740 for x in self.topfiles():
729 yield x
741 yield x
730
742
731 def copylist(self):
743 def copylist(self):
732 return _data
744 return _data
733
745
734 def write(self, tr):
746 def write(self, tr):
735 pass
747 pass
736
748
737 def invalidatecaches(self):
749 def invalidatecaches(self):
738 pass
750 pass
739
751
740 def markremoved(self, fn):
752 def markremoved(self, fn):
741 pass
753 pass
742
754
743 def __contains__(self, path):
755 def __contains__(self, path):
744 '''Checks if the store contains path'''
756 '''Checks if the store contains path'''
745 path = b"/".join((b"data", path))
757 path = b"/".join((b"data", path))
746 # file?
758 # file?
747 if self.vfs.exists(path + b".i"):
759 if self.vfs.exists(path + b".i"):
748 return True
760 return True
749 # dir?
761 # dir?
750 if not path.endswith(b"/"):
762 if not path.endswith(b"/"):
751 path = path + b"/"
763 path = path + b"/"
752 return self.vfs.exists(path)
764 return self.vfs.exists(path)
753
765
754
766
755 class encodedstore(basicstore):
767 class encodedstore(basicstore):
756 def __init__(self, path, vfstype):
768 def __init__(self, path, vfstype):
757 vfs = vfstype(path + b'/store')
769 vfs = vfstype(path + b'/store')
758 self.path = vfs.base
770 self.path = vfs.base
759 self.createmode = _calcmode(vfs)
771 self.createmode = _calcmode(vfs)
760 vfs.createmode = self.createmode
772 vfs.createmode = self.createmode
761 self.rawvfs = vfs
773 self.rawvfs = vfs
762 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
774 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
763 self.opener = self.vfs
775 self.opener = self.vfs
764
776
765 def _walk(self, relpath, recurse, undecodable=None):
777 def _walk(self, relpath, recurse, undecodable=None):
766 old = super()._walk(relpath, recurse)
778 old = super()._walk(relpath, recurse)
767 new = []
779 new = []
768 for f1, value in old:
780 for f1, value in old:
769 try:
781 try:
770 f2 = decodefilename(f1)
782 f2 = decodefilename(f1)
771 except KeyError:
783 except KeyError:
772 if undecodable is None:
784 if undecodable is None:
773 msg = _(b'undecodable revlog name %s') % f1
785 msg = _(b'undecodable revlog name %s') % f1
774 raise error.StorageError(msg)
786 raise error.StorageError(msg)
775 else:
787 else:
776 undecodable.append(f1)
788 undecodable.append(f1)
777 continue
789 continue
778 new.append((f2, value))
790 new.append((f2, value))
779 return new
791 return new
780
792
781 def datafiles(
793 def datafiles(
782 self, matcher=None, undecodable=None
794 self, matcher=None, undecodable=None
783 ) -> Generator[BaseStoreEntry, None, None]:
795 ) -> Generator[BaseStoreEntry, None, None]:
784 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
796 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
785 for entry in entries:
797 for entry in entries:
786 if _match_tracked_entry(entry, matcher):
798 if _match_tracked_entry(entry, matcher):
787 yield entry
799 yield entry
788
800
789 def join(self, f):
801 def join(self, f):
790 return self.path + b'/' + encodefilename(f)
802 return self.path + b'/' + encodefilename(f)
791
803
792 def copylist(self):
804 def copylist(self):
793 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
805 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
794
806
795
807
796 class fncache:
808 class fncache:
797 # the filename used to be partially encoded
809 # the filename used to be partially encoded
798 # hence the encodedir/decodedir dance
810 # hence the encodedir/decodedir dance
799 def __init__(self, vfs):
811 def __init__(self, vfs):
800 self.vfs = vfs
812 self.vfs = vfs
801 self._ignores = set()
813 self._ignores = set()
802 self.entries = None
814 self.entries = None
803 self._dirty = False
815 self._dirty = False
804 # set of new additions to fncache
816 # set of new additions to fncache
805 self.addls = set()
817 self.addls = set()
806
818
807 def ensureloaded(self, warn=None):
819 def ensureloaded(self, warn=None):
808 """read the fncache file if not already read.
820 """read the fncache file if not already read.
809
821
810 If the file on disk is corrupted, raise. If warn is provided,
822 If the file on disk is corrupted, raise. If warn is provided,
811 warn and keep going instead."""
823 warn and keep going instead."""
812 if self.entries is None:
824 if self.entries is None:
813 self._load(warn)
825 self._load(warn)
814
826
815 def _load(self, warn=None):
827 def _load(self, warn=None):
816 '''fill the entries from the fncache file'''
828 '''fill the entries from the fncache file'''
817 self._dirty = False
829 self._dirty = False
818 try:
830 try:
819 fp = self.vfs(b'fncache', mode=b'rb')
831 fp = self.vfs(b'fncache', mode=b'rb')
820 except IOError:
832 except IOError:
821 # skip nonexistent file
833 # skip nonexistent file
822 self.entries = set()
834 self.entries = set()
823 return
835 return
824
836
825 self.entries = set()
837 self.entries = set()
826 chunk = b''
838 chunk = b''
827 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
839 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
828 chunk += c
840 chunk += c
829 try:
841 try:
830 p = chunk.rindex(b'\n')
842 p = chunk.rindex(b'\n')
831 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
843 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
832 chunk = chunk[p + 1 :]
844 chunk = chunk[p + 1 :]
833 except ValueError:
845 except ValueError:
834 # substring '\n' not found, maybe the entry is bigger than the
846 # substring '\n' not found, maybe the entry is bigger than the
835 # chunksize, so let's keep iterating
847 # chunksize, so let's keep iterating
836 pass
848 pass
837
849
838 if chunk:
850 if chunk:
839 msg = _(b"fncache does not ends with a newline")
851 msg = _(b"fncache does not ends with a newline")
840 if warn:
852 if warn:
841 warn(msg + b'\n')
853 warn(msg + b'\n')
842 else:
854 else:
843 raise error.Abort(
855 raise error.Abort(
844 msg,
856 msg,
845 hint=_(
857 hint=_(
846 b"use 'hg debugrebuildfncache' to "
858 b"use 'hg debugrebuildfncache' to "
847 b"rebuild the fncache"
859 b"rebuild the fncache"
848 ),
860 ),
849 )
861 )
850 self._checkentries(fp, warn)
862 self._checkentries(fp, warn)
851 fp.close()
863 fp.close()
852
864
853 def _checkentries(self, fp, warn):
865 def _checkentries(self, fp, warn):
854 """make sure there is no empty string in entries"""
866 """make sure there is no empty string in entries"""
855 if b'' in self.entries:
867 if b'' in self.entries:
856 fp.seek(0)
868 fp.seek(0)
857 for n, line in enumerate(fp):
869 for n, line in enumerate(fp):
858 if not line.rstrip(b'\n'):
870 if not line.rstrip(b'\n'):
859 t = _(b'invalid entry in fncache, line %d') % (n + 1)
871 t = _(b'invalid entry in fncache, line %d') % (n + 1)
860 if warn:
872 if warn:
861 warn(t + b'\n')
873 warn(t + b'\n')
862 else:
874 else:
863 raise error.Abort(t)
875 raise error.Abort(t)
864
876
865 def write(self, tr):
877 def write(self, tr):
866 if self._dirty:
878 if self._dirty:
867 assert self.entries is not None
879 assert self.entries is not None
868 self.entries = self.entries | self.addls
880 self.entries = self.entries | self.addls
869 self.addls = set()
881 self.addls = set()
870 tr.addbackup(b'fncache')
882 tr.addbackup(b'fncache')
871 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
883 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
872 if self.entries:
884 if self.entries:
873 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
885 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
874 fp.close()
886 fp.close()
875 self._dirty = False
887 self._dirty = False
876 if self.addls:
888 if self.addls:
877 # if we have just new entries, let's append them to the fncache
889 # if we have just new entries, let's append them to the fncache
878 tr.addbackup(b'fncache')
890 tr.addbackup(b'fncache')
879 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
891 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
880 if self.addls:
892 if self.addls:
881 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
893 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
882 fp.close()
894 fp.close()
883 self.entries = None
895 self.entries = None
884 self.addls = set()
896 self.addls = set()
885
897
886 def addignore(self, fn):
898 def addignore(self, fn):
887 self._ignores.add(fn)
899 self._ignores.add(fn)
888
900
889 def add(self, fn):
901 def add(self, fn):
890 if fn in self._ignores:
902 if fn in self._ignores:
891 return
903 return
892 if self.entries is None:
904 if self.entries is None:
893 self._load()
905 self._load()
894 if fn not in self.entries:
906 if fn not in self.entries:
895 self.addls.add(fn)
907 self.addls.add(fn)
896
908
897 def remove(self, fn):
909 def remove(self, fn):
898 if self.entries is None:
910 if self.entries is None:
899 self._load()
911 self._load()
900 if fn in self.addls:
912 if fn in self.addls:
901 self.addls.remove(fn)
913 self.addls.remove(fn)
902 return
914 return
903 try:
915 try:
904 self.entries.remove(fn)
916 self.entries.remove(fn)
905 self._dirty = True
917 self._dirty = True
906 except KeyError:
918 except KeyError:
907 pass
919 pass
908
920
909 def __contains__(self, fn):
921 def __contains__(self, fn):
910 if fn in self.addls:
922 if fn in self.addls:
911 return True
923 return True
912 if self.entries is None:
924 if self.entries is None:
913 self._load()
925 self._load()
914 return fn in self.entries
926 return fn in self.entries
915
927
916 def __iter__(self):
928 def __iter__(self):
917 if self.entries is None:
929 if self.entries is None:
918 self._load()
930 self._load()
919 return iter(self.entries | self.addls)
931 return iter(self.entries | self.addls)
920
932
921
933
922 class _fncachevfs(vfsmod.proxyvfs):
934 class _fncachevfs(vfsmod.proxyvfs):
923 def __init__(self, vfs, fnc, encode):
935 def __init__(self, vfs, fnc, encode):
924 vfsmod.proxyvfs.__init__(self, vfs)
936 vfsmod.proxyvfs.__init__(self, vfs)
925 self.fncache = fnc
937 self.fncache = fnc
926 self.encode = encode
938 self.encode = encode
927
939
928 def __call__(self, path, mode=b'r', *args, **kw):
940 def __call__(self, path, mode=b'r', *args, **kw):
929 encoded = self.encode(path)
941 encoded = self.encode(path)
930 if (
942 if (
931 mode not in (b'r', b'rb')
943 mode not in (b'r', b'rb')
932 and (path.startswith(b'data/') or path.startswith(b'meta/'))
944 and (path.startswith(b'data/') or path.startswith(b'meta/'))
933 and revlog_type(path) is not None
945 and revlog_type(path) is not None
934 ):
946 ):
935 # do not trigger a fncache load when adding a file that already is
947 # do not trigger a fncache load when adding a file that already is
936 # known to exist.
948 # known to exist.
937 notload = self.fncache.entries is None and self.vfs.exists(encoded)
949 notload = self.fncache.entries is None and self.vfs.exists(encoded)
938 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
950 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
939 # when appending to an existing file, if the file has size zero,
951 # when appending to an existing file, if the file has size zero,
940 # it should be considered as missing. Such zero-size files are
952 # it should be considered as missing. Such zero-size files are
941 # the result of truncation when a transaction is aborted.
953 # the result of truncation when a transaction is aborted.
942 notload = False
954 notload = False
943 if not notload:
955 if not notload:
944 self.fncache.add(path)
956 self.fncache.add(path)
945 return self.vfs(encoded, mode, *args, **kw)
957 return self.vfs(encoded, mode, *args, **kw)
946
958
947 def join(self, path):
959 def join(self, path):
948 if path:
960 if path:
949 return self.vfs.join(self.encode(path))
961 return self.vfs.join(self.encode(path))
950 else:
962 else:
951 return self.vfs.join(path)
963 return self.vfs.join(path)
952
964
953 def register_file(self, path):
965 def register_file(self, path):
954 """generic hook point to lets fncache steer its stew"""
966 """generic hook point to lets fncache steer its stew"""
955 if path.startswith(b'data/') or path.startswith(b'meta/'):
967 if path.startswith(b'data/') or path.startswith(b'meta/'):
956 self.fncache.add(path)
968 self.fncache.add(path)
957
969
958
970
959 class fncachestore(basicstore):
971 class fncachestore(basicstore):
960 def __init__(self, path, vfstype, dotencode):
972 def __init__(self, path, vfstype, dotencode):
961 if dotencode:
973 if dotencode:
962 encode = _pathencode
974 encode = _pathencode
963 else:
975 else:
964 encode = _plainhybridencode
976 encode = _plainhybridencode
965 self.encode = encode
977 self.encode = encode
966 vfs = vfstype(path + b'/store')
978 vfs = vfstype(path + b'/store')
967 self.path = vfs.base
979 self.path = vfs.base
968 self.pathsep = self.path + b'/'
980 self.pathsep = self.path + b'/'
969 self.createmode = _calcmode(vfs)
981 self.createmode = _calcmode(vfs)
970 vfs.createmode = self.createmode
982 vfs.createmode = self.createmode
971 self.rawvfs = vfs
983 self.rawvfs = vfs
972 fnc = fncache(vfs)
984 fnc = fncache(vfs)
973 self.fncache = fnc
985 self.fncache = fnc
974 self.vfs = _fncachevfs(vfs, fnc, encode)
986 self.vfs = _fncachevfs(vfs, fnc, encode)
975 self.opener = self.vfs
987 self.opener = self.vfs
976
988
977 def join(self, f):
989 def join(self, f):
978 return self.pathsep + self.encode(f)
990 return self.pathsep + self.encode(f)
979
991
980 def getsize(self, path):
992 def getsize(self, path):
981 return self.rawvfs.stat(path).st_size
993 return self.rawvfs.stat(path).st_size
982
994
983 def datafiles(
995 def datafiles(
984 self, matcher=None, undecodable=None
996 self, matcher=None, undecodable=None
985 ) -> Generator[BaseStoreEntry, None, None]:
997 ) -> Generator[BaseStoreEntry, None, None]:
986 files = ((f, revlog_type(f)) for f in self.fncache)
998 files = ((f, revlog_type(f)) for f in self.fncache)
987 # Note: all files in fncache should be revlog related, However the
999 # Note: all files in fncache should be revlog related, However the
988 # fncache might contains such file added by previous version of
1000 # fncache might contains such file added by previous version of
989 # Mercurial.
1001 # Mercurial.
990 files = (f for f in files if f[1] is not None)
1002 files = (f for f in files if f[1] is not None)
991 by_revlog = _gather_revlog(files)
1003 by_revlog = _gather_revlog(files)
992 for revlog, details in by_revlog:
1004 for revlog, details in by_revlog:
993 file_details = {}
1005 file_details = {}
994 if revlog.startswith(b'data/'):
1006 if revlog.startswith(b'data/'):
995 rl_type = FILEFLAGS_FILELOG
1007 rl_type = FILEFLAGS_FILELOG
996 revlog_target_id = revlog.split(b'/', 1)[1]
1008 revlog_target_id = revlog.split(b'/', 1)[1]
997 elif revlog.startswith(b'meta/'):
1009 elif revlog.startswith(b'meta/'):
998 rl_type = FILEFLAGS_MANIFESTLOG
1010 rl_type = FILEFLAGS_MANIFESTLOG
999 # drop the initial directory and the `00manifest` file part
1011 # drop the initial directory and the `00manifest` file part
1000 tmp = revlog.split(b'/', 1)[1]
1012 tmp = revlog.split(b'/', 1)[1]
1001 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1013 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1002 else:
1014 else:
1003 # unreachable
1015 # unreachable
1004 assert False, revlog
1016 assert False, revlog
1005 for ext, t in details.items():
1017 for ext, t in details.items():
1006 file_details[ext] = {
1018 file_details[ext] = {
1007 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1019 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1008 }
1020 }
1009 entry = RevlogStoreEntry(
1021 entry = RevlogStoreEntry(
1010 path_prefix=revlog,
1022 path_prefix=revlog,
1011 revlog_type=rl_type,
1023 revlog_type=rl_type,
1012 target_id=revlog_target_id,
1024 target_id=revlog_target_id,
1013 details=file_details,
1025 details=file_details,
1014 )
1026 )
1015 if _match_tracked_entry(entry, matcher):
1027 if _match_tracked_entry(entry, matcher):
1016 yield entry
1028 yield entry
1017
1029
1018 def copylist(self):
1030 def copylist(self):
1019 d = (
1031 d = (
1020 b'bookmarks',
1032 b'bookmarks',
1021 b'narrowspec',
1033 b'narrowspec',
1022 b'data',
1034 b'data',
1023 b'meta',
1035 b'meta',
1024 b'dh',
1036 b'dh',
1025 b'fncache',
1037 b'fncache',
1026 b'phaseroots',
1038 b'phaseroots',
1027 b'obsstore',
1039 b'obsstore',
1028 b'00manifest.d',
1040 b'00manifest.d',
1029 b'00manifest.i',
1041 b'00manifest.i',
1030 b'00changelog.d',
1042 b'00changelog.d',
1031 b'00changelog.i',
1043 b'00changelog.i',
1032 b'requires',
1044 b'requires',
1033 )
1045 )
1034 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1046 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1035
1047
1036 def write(self, tr):
1048 def write(self, tr):
1037 self.fncache.write(tr)
1049 self.fncache.write(tr)
1038
1050
1039 def invalidatecaches(self):
1051 def invalidatecaches(self):
1040 self.fncache.entries = None
1052 self.fncache.entries = None
1041 self.fncache.addls = set()
1053 self.fncache.addls = set()
1042
1054
1043 def markremoved(self, fn):
1055 def markremoved(self, fn):
1044 self.fncache.remove(fn)
1056 self.fncache.remove(fn)
1045
1057
1046 def _exists(self, f):
1058 def _exists(self, f):
1047 ef = self.encode(f)
1059 ef = self.encode(f)
1048 try:
1060 try:
1049 self.getsize(ef)
1061 self.getsize(ef)
1050 return True
1062 return True
1051 except FileNotFoundError:
1063 except FileNotFoundError:
1052 return False
1064 return False
1053
1065
1054 def __contains__(self, path):
1066 def __contains__(self, path):
1055 '''Checks if the store contains path'''
1067 '''Checks if the store contains path'''
1056 path = b"/".join((b"data", path))
1068 path = b"/".join((b"data", path))
1057 # check for files (exact match)
1069 # check for files (exact match)
1058 e = path + b'.i'
1070 e = path + b'.i'
1059 if e in self.fncache and self._exists(e):
1071 if e in self.fncache and self._exists(e):
1060 return True
1072 return True
1061 # now check for directories (prefix match)
1073 # now check for directories (prefix match)
1062 if not path.endswith(b'/'):
1074 if not path.endswith(b'/'):
1063 path += b'/'
1075 path += b'/'
1064 for e in self.fncache:
1076 for e in self.fncache:
1065 if e.startswith(path) and self._exists(e):
1077 if e.startswith(path) and self._exists(e):
1066 return True
1078 return True
1067 return False
1079 return False
General Comments 0
You need to be logged in to leave comments. Login now