##// END OF EJS Templates
store: add logic to group revlog file together...
marmoute -
r51372:5217e363 default
parent child Browse files
Show More
@@ -1,971 +1,1004
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 REVLOG_FILES_LONG_EXT = (
400 b'.nd',
401 b'.idx',
402 b'.dat',
403 b'.sda',
404 )
398 # files that are "volatile" and might change between listing and streaming
405 # files that are "volatile" and might change between listing and streaming
399 #
406 #
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 # deleted.
408 # deleted.
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403
410
404 # some exception to the above matching
411 # some exception to the above matching
405 #
412 #
406 # XXX This is currently not in use because of issue6542
413 # XXX This is currently not in use because of issue6542
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408
415
409
416
410 def is_revlog(f, kind, st):
417 def is_revlog(f, kind, st):
411 if kind != stat.S_IFREG:
418 if kind != stat.S_IFREG:
412 return None
419 return None
413 return revlog_type(f)
420 return revlog_type(f)
414
421
415
422
416 def revlog_type(f):
423 def revlog_type(f):
417 # XXX we need to filter `undo.` created by the transaction here, however
424 # XXX we need to filter `undo.` created by the transaction here, however
418 # being naive about it also filter revlog for `undo.*` files, leading to
425 # being naive about it also filter revlog for `undo.*` files, leading to
419 # issue6542. So we no longer use EXCLUDED.
426 # issue6542. So we no longer use EXCLUDED.
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 return FILEFLAGS_REVLOG_MAIN
428 return FILEFLAGS_REVLOG_MAIN
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 t = FILETYPE_FILELOG_OTHER
430 t = FILETYPE_FILELOG_OTHER
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 t |= FILEFLAGS_VOLATILE
432 t |= FILEFLAGS_VOLATILE
426 return t
433 return t
427 return None
434 return None
428
435
429
436
430 # the file is part of changelog data
437 # the file is part of changelog data
431 FILEFLAGS_CHANGELOG = 1 << 13
438 FILEFLAGS_CHANGELOG = 1 << 13
432 # the file is part of manifest data
439 # the file is part of manifest data
433 FILEFLAGS_MANIFESTLOG = 1 << 12
440 FILEFLAGS_MANIFESTLOG = 1 << 12
434 # the file is part of filelog data
441 # the file is part of filelog data
435 FILEFLAGS_FILELOG = 1 << 11
442 FILEFLAGS_FILELOG = 1 << 11
436 # file that are not directly part of a revlog
443 # file that are not directly part of a revlog
437 FILEFLAGS_OTHER = 1 << 10
444 FILEFLAGS_OTHER = 1 << 10
438
445
439 # the main entry point for a revlog
446 # the main entry point for a revlog
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 # a secondary file for a revlog
448 # a secondary file for a revlog
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 FILEFLAGS_REVLOG_OTHER = 1 << 0
443
450
444 # files that are "volatile" and might change between listing and streaming
451 # files that are "volatile" and might change between listing and streaming
445 FILEFLAGS_VOLATILE = 1 << 20
452 FILEFLAGS_VOLATILE = 1 << 20
446
453
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
460 FILETYPE_OTHER = FILEFLAGS_OTHER
454
461
455
462
456 @attr.s(slots=True, init=False)
463 @attr.s(slots=True, init=False)
457 class BaseStoreEntry:
464 class BaseStoreEntry:
458 """An entry in the store
465 """An entry in the store
459
466
460 This is returned by `store.walk` and represent some data in the store."""
467 This is returned by `store.walk` and represent some data in the store."""
461
468
462 unencoded_path = attr.ib()
469 unencoded_path = attr.ib()
463 _is_volatile = attr.ib(default=False)
470 _is_volatile = attr.ib(default=False)
464 _file_size = attr.ib(default=None)
471 _file_size = attr.ib(default=None)
465
472
466 def __init__(
473 def __init__(
467 self,
474 self,
468 unencoded_path,
475 unencoded_path,
469 is_volatile=False,
476 is_volatile=False,
470 file_size=None,
477 file_size=None,
471 ):
478 ):
472 self.unencoded_path = unencoded_path
479 self.unencoded_path = unencoded_path
473 self._is_volatile = is_volatile
480 self._is_volatile = is_volatile
474 self._file_size = file_size
481 self._file_size = file_size
475
482
476 def files(self):
483 def files(self):
477 return [
484 return [
478 StoreFile(
485 StoreFile(
479 unencoded_path=self.unencoded_path,
486 unencoded_path=self.unencoded_path,
480 file_size=self._file_size,
487 file_size=self._file_size,
481 is_volatile=self._is_volatile,
488 is_volatile=self._is_volatile,
482 )
489 )
483 ]
490 ]
484
491
485
492
486 @attr.s(slots=True, init=False)
493 @attr.s(slots=True, init=False)
487 class SimpleStoreEntry(BaseStoreEntry):
494 class SimpleStoreEntry(BaseStoreEntry):
488 """A generic entry in the store"""
495 """A generic entry in the store"""
489
496
490 is_revlog = False
497 is_revlog = False
491
498
492
499
493 @attr.s(slots=True, init=False)
500 @attr.s(slots=True, init=False)
494 class RevlogStoreEntry(BaseStoreEntry):
501 class RevlogStoreEntry(BaseStoreEntry):
495 """A revlog entry in the store"""
502 """A revlog entry in the store"""
496
503
497 is_revlog = True
504 is_revlog = True
498 revlog_type = attr.ib(default=None)
505 revlog_type = attr.ib(default=None)
499 is_revlog_main = attr.ib(default=None)
506 is_revlog_main = attr.ib(default=None)
500
507
501 def __init__(
508 def __init__(
502 self,
509 self,
503 unencoded_path,
510 unencoded_path,
504 revlog_type,
511 revlog_type,
505 is_revlog_main=False,
512 is_revlog_main=False,
506 is_volatile=False,
513 is_volatile=False,
507 file_size=None,
514 file_size=None,
508 ):
515 ):
509 super().__init__(
516 super().__init__(
510 unencoded_path=unencoded_path,
517 unencoded_path=unencoded_path,
511 is_volatile=is_volatile,
518 is_volatile=is_volatile,
512 file_size=file_size,
519 file_size=file_size,
513 )
520 )
514 self.revlog_type = revlog_type
521 self.revlog_type = revlog_type
515 self.is_revlog_main = is_revlog_main
522 self.is_revlog_main = is_revlog_main
516
523
517
524
518 @attr.s(slots=True)
525 @attr.s(slots=True)
519 class StoreFile:
526 class StoreFile:
520 """a file matching an entry"""
527 """a file matching an entry"""
521
528
522 unencoded_path = attr.ib()
529 unencoded_path = attr.ib()
523 _file_size = attr.ib(default=False)
530 _file_size = attr.ib(default=False)
524 is_volatile = attr.ib(default=False)
531 is_volatile = attr.ib(default=False)
525
532
526 def file_size(self, vfs):
533 def file_size(self, vfs):
527 if self._file_size is not None:
534 if self._file_size is not None:
528 return self._file_size
535 return self._file_size
529 try:
536 try:
530 return vfs.stat(self.unencoded_path).st_size
537 return vfs.stat(self.unencoded_path).st_size
531 except FileNotFoundError:
538 except FileNotFoundError:
532 return 0
539 return 0
533
540
534
541
542 def _gather_revlog(files_data):
543 """group files per revlog prefix
544
545 The returns a two level nested dict. The top level key is the revlog prefix
546 without extension, the second level is all the file "suffix" that were
547 seen for this revlog and arbitrary file data as value.
548 """
549 revlogs = collections.defaultdict(dict)
550 for u, value in files_data:
551 name, ext = _split_revlog_ext(u)
552 revlogs[name][ext] = value
553 return sorted(revlogs.items())
554
555
556 def _split_revlog_ext(filename):
557 """split the revlog file prefix from the variable extension"""
558 if filename.endswith(REVLOG_FILES_LONG_EXT):
559 char = b'-'
560 else:
561 char = b'.'
562 idx = filename.rfind(char)
563 return filename[:idx], filename[idx:]
564
565
535 class basicstore:
566 class basicstore:
536 '''base class for local repository stores'''
567 '''base class for local repository stores'''
537
568
538 def __init__(self, path, vfstype):
569 def __init__(self, path, vfstype):
539 vfs = vfstype(path)
570 vfs = vfstype(path)
540 self.path = vfs.base
571 self.path = vfs.base
541 self.createmode = _calcmode(vfs)
572 self.createmode = _calcmode(vfs)
542 vfs.createmode = self.createmode
573 vfs.createmode = self.createmode
543 self.rawvfs = vfs
574 self.rawvfs = vfs
544 self.vfs = vfsmod.filtervfs(vfs, encodedir)
575 self.vfs = vfsmod.filtervfs(vfs, encodedir)
545 self.opener = self.vfs
576 self.opener = self.vfs
546
577
547 def join(self, f):
578 def join(self, f):
548 return self.path + b'/' + encodedir(f)
579 return self.path + b'/' + encodedir(f)
549
580
550 def _walk(self, relpath, recurse):
581 def _walk(self, relpath, recurse):
551 '''yields (revlog_type, unencoded, size)'''
582 '''yields (revlog_type, unencoded, size)'''
552 path = self.path
583 path = self.path
553 if relpath:
584 if relpath:
554 path += b'/' + relpath
585 path += b'/' + relpath
555 striplen = len(self.path) + 1
586 striplen = len(self.path) + 1
556 l = []
587 l = []
557 if self.rawvfs.isdir(path):
588 if self.rawvfs.isdir(path):
558 visit = [path]
589 visit = [path]
559 readdir = self.rawvfs.readdir
590 readdir = self.rawvfs.readdir
560 while visit:
591 while visit:
561 p = visit.pop()
592 p = visit.pop()
562 for f, kind, st in readdir(p, stat=True):
593 for f, kind, st in readdir(p, stat=True):
563 fp = p + b'/' + f
594 fp = p + b'/' + f
564 rl_type = is_revlog(f, kind, st)
595 rl_type = is_revlog(f, kind, st)
565 if rl_type is not None:
596 if rl_type is not None:
566 n = util.pconvert(fp[striplen:])
597 n = util.pconvert(fp[striplen:])
567 l.append((decodedir(n), (rl_type, st.st_size)))
598 l.append((decodedir(n), (rl_type, st.st_size)))
568 elif kind == stat.S_IFDIR and recurse:
599 elif kind == stat.S_IFDIR and recurse:
569 visit.append(fp)
600 visit.append(fp)
570
601
571 l.sort()
602 l.sort()
572 return l
603 return l
573
604
574 def changelog(self, trypending, concurrencychecker=None):
605 def changelog(self, trypending, concurrencychecker=None):
575 return changelog.changelog(
606 return changelog.changelog(
576 self.vfs,
607 self.vfs,
577 trypending=trypending,
608 trypending=trypending,
578 concurrencychecker=concurrencychecker,
609 concurrencychecker=concurrencychecker,
579 )
610 )
580
611
581 def manifestlog(self, repo, storenarrowmatch):
612 def manifestlog(self, repo, storenarrowmatch):
582 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
613 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
583 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
614 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
584
615
585 def datafiles(
616 def datafiles(
586 self, matcher=None, undecodable=None
617 self, matcher=None, undecodable=None
587 ) -> Generator[BaseStoreEntry, None, None]:
618 ) -> Generator[BaseStoreEntry, None, None]:
588 """Like walk, but excluding the changelog and root manifest.
619 """Like walk, but excluding the changelog and root manifest.
589
620
590 When [undecodable] is None, revlogs names that can't be
621 When [undecodable] is None, revlogs names that can't be
591 decoded cause an exception. When it is provided, it should
622 decoded cause an exception. When it is provided, it should
592 be a list and the filenames that can't be decoded are added
623 be a list and the filenames that can't be decoded are added
593 to it instead. This is very rarely needed."""
624 to it instead. This is very rarely needed."""
594 files = self._walk(b'data', True) + self._walk(b'meta', True)
625 files = self._walk(b'data', True) + self._walk(b'meta', True)
595 for u, (t, s) in files:
626 files = (f for f in files if f[1][0] is not None)
596 if t is not None:
627 for revlog, details in _gather_revlog(files):
628 for ext, (t, s) in sorted(details.items()):
629 u = revlog + ext
597 yield RevlogStoreEntry(
630 yield RevlogStoreEntry(
598 unencoded_path=u,
631 unencoded_path=u,
599 revlog_type=FILEFLAGS_FILELOG,
632 revlog_type=FILEFLAGS_FILELOG,
600 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
633 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
601 is_volatile=bool(t & FILEFLAGS_VOLATILE),
634 is_volatile=bool(t & FILEFLAGS_VOLATILE),
602 file_size=s,
635 file_size=s,
603 )
636 )
604
637
605 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
638 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
606 # yield manifest before changelog
639 # yield manifest before changelog
607 files = self._walk(b'', False)
640 files = self._walk(b'', False)
608 # key is (type, path) (keeping ordering so we get 00changelog.i last)
641 # key is (type, path) (keeping ordering so we get 00changelog.i last)
609 type_key = lambda x: (x[1][0], x[0])
642 type_key = lambda x: (x[1][0], x[0])
610 files = sorted(files, reverse=True, key=type_key)
643 files = sorted(files, reverse=True, key=type_key)
611 for u, (t, s) in files:
644 for u, (t, s) in files:
612 if u.startswith(b'00changelog'):
645 if u.startswith(b'00changelog'):
613 yield RevlogStoreEntry(
646 yield RevlogStoreEntry(
614 unencoded_path=u,
647 unencoded_path=u,
615 revlog_type=FILEFLAGS_CHANGELOG,
648 revlog_type=FILEFLAGS_CHANGELOG,
616 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
649 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
617 is_volatile=bool(t & FILEFLAGS_VOLATILE),
650 is_volatile=bool(t & FILEFLAGS_VOLATILE),
618 file_size=s,
651 file_size=s,
619 )
652 )
620 elif u.startswith(b'00manifest'):
653 elif u.startswith(b'00manifest'):
621 yield RevlogStoreEntry(
654 yield RevlogStoreEntry(
622 unencoded_path=u,
655 unencoded_path=u,
623 revlog_type=FILEFLAGS_MANIFESTLOG,
656 revlog_type=FILEFLAGS_MANIFESTLOG,
624 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
657 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
625 is_volatile=bool(t & FILEFLAGS_VOLATILE),
658 is_volatile=bool(t & FILEFLAGS_VOLATILE),
626 file_size=s,
659 file_size=s,
627 )
660 )
628 else:
661 else:
629 yield SimpleStoreEntry(
662 yield SimpleStoreEntry(
630 unencoded_path=u,
663 unencoded_path=u,
631 is_volatile=bool(t & FILEFLAGS_VOLATILE),
664 is_volatile=bool(t & FILEFLAGS_VOLATILE),
632 file_size=s,
665 file_size=s,
633 )
666 )
634
667
635 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
668 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
636 """return files related to data storage (ie: revlogs)
669 """return files related to data storage (ie: revlogs)
637
670
638 yields (file_type, unencoded, size)
671 yields (file_type, unencoded, size)
639
672
640 if a matcher is passed, storage files of only those tracked paths
673 if a matcher is passed, storage files of only those tracked paths
641 are passed with matches the matcher
674 are passed with matches the matcher
642 """
675 """
643 # yield data files first
676 # yield data files first
644 for x in self.datafiles(matcher):
677 for x in self.datafiles(matcher):
645 yield x
678 yield x
646 for x in self.topfiles():
679 for x in self.topfiles():
647 yield x
680 yield x
648
681
649 def copylist(self):
682 def copylist(self):
650 return _data
683 return _data
651
684
652 def write(self, tr):
685 def write(self, tr):
653 pass
686 pass
654
687
655 def invalidatecaches(self):
688 def invalidatecaches(self):
656 pass
689 pass
657
690
658 def markremoved(self, fn):
691 def markremoved(self, fn):
659 pass
692 pass
660
693
661 def __contains__(self, path):
694 def __contains__(self, path):
662 '''Checks if the store contains path'''
695 '''Checks if the store contains path'''
663 path = b"/".join((b"data", path))
696 path = b"/".join((b"data", path))
664 # file?
697 # file?
665 if self.vfs.exists(path + b".i"):
698 if self.vfs.exists(path + b".i"):
666 return True
699 return True
667 # dir?
700 # dir?
668 if not path.endswith(b"/"):
701 if not path.endswith(b"/"):
669 path = path + b"/"
702 path = path + b"/"
670 return self.vfs.exists(path)
703 return self.vfs.exists(path)
671
704
672
705
673 class encodedstore(basicstore):
706 class encodedstore(basicstore):
674 def __init__(self, path, vfstype):
707 def __init__(self, path, vfstype):
675 vfs = vfstype(path + b'/store')
708 vfs = vfstype(path + b'/store')
676 self.path = vfs.base
709 self.path = vfs.base
677 self.createmode = _calcmode(vfs)
710 self.createmode = _calcmode(vfs)
678 vfs.createmode = self.createmode
711 vfs.createmode = self.createmode
679 self.rawvfs = vfs
712 self.rawvfs = vfs
680 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
713 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
681 self.opener = self.vfs
714 self.opener = self.vfs
682
715
683 # note: topfiles would also need a decode phase. It is just that in
716 # note: topfiles would also need a decode phase. It is just that in
684 # practice we do not have any file outside of `data/` that needs encoding.
717 # practice we do not have any file outside of `data/` that needs encoding.
685 # However that might change so we should probably add a test and encoding
718 # However that might change so we should probably add a test and encoding
686 # decoding for it too. see issue6548
719 # decoding for it too. see issue6548
687
720
688 def datafiles(
721 def datafiles(
689 self, matcher=None, undecodable=None
722 self, matcher=None, undecodable=None
690 ) -> Generator[BaseStoreEntry, None, None]:
723 ) -> Generator[BaseStoreEntry, None, None]:
691 for entry in super(encodedstore, self).datafiles():
724 for entry in super(encodedstore, self).datafiles():
692 try:
725 try:
693 f1 = entry.unencoded_path
726 f1 = entry.unencoded_path
694 f2 = decodefilename(f1)
727 f2 = decodefilename(f1)
695 except KeyError:
728 except KeyError:
696 if undecodable is None:
729 if undecodable is None:
697 msg = _(b'undecodable revlog name %s') % f1
730 msg = _(b'undecodable revlog name %s') % f1
698 raise error.StorageError(msg)
731 raise error.StorageError(msg)
699 else:
732 else:
700 undecodable.append(f1)
733 undecodable.append(f1)
701 continue
734 continue
702 if not _matchtrackedpath(f2, matcher):
735 if not _matchtrackedpath(f2, matcher):
703 continue
736 continue
704 entry.unencoded_path = f2
737 entry.unencoded_path = f2
705 yield entry
738 yield entry
706
739
707 def join(self, f):
740 def join(self, f):
708 return self.path + b'/' + encodefilename(f)
741 return self.path + b'/' + encodefilename(f)
709
742
710 def copylist(self):
743 def copylist(self):
711 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
744 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
712
745
713
746
714 class fncache:
747 class fncache:
715 # the filename used to be partially encoded
748 # the filename used to be partially encoded
716 # hence the encodedir/decodedir dance
749 # hence the encodedir/decodedir dance
717 def __init__(self, vfs):
750 def __init__(self, vfs):
718 self.vfs = vfs
751 self.vfs = vfs
719 self._ignores = set()
752 self._ignores = set()
720 self.entries = None
753 self.entries = None
721 self._dirty = False
754 self._dirty = False
722 # set of new additions to fncache
755 # set of new additions to fncache
723 self.addls = set()
756 self.addls = set()
724
757
725 def ensureloaded(self, warn=None):
758 def ensureloaded(self, warn=None):
726 """read the fncache file if not already read.
759 """read the fncache file if not already read.
727
760
728 If the file on disk is corrupted, raise. If warn is provided,
761 If the file on disk is corrupted, raise. If warn is provided,
729 warn and keep going instead."""
762 warn and keep going instead."""
730 if self.entries is None:
763 if self.entries is None:
731 self._load(warn)
764 self._load(warn)
732
765
733 def _load(self, warn=None):
766 def _load(self, warn=None):
734 '''fill the entries from the fncache file'''
767 '''fill the entries from the fncache file'''
735 self._dirty = False
768 self._dirty = False
736 try:
769 try:
737 fp = self.vfs(b'fncache', mode=b'rb')
770 fp = self.vfs(b'fncache', mode=b'rb')
738 except IOError:
771 except IOError:
739 # skip nonexistent file
772 # skip nonexistent file
740 self.entries = set()
773 self.entries = set()
741 return
774 return
742
775
743 self.entries = set()
776 self.entries = set()
744 chunk = b''
777 chunk = b''
745 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
778 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
746 chunk += c
779 chunk += c
747 try:
780 try:
748 p = chunk.rindex(b'\n')
781 p = chunk.rindex(b'\n')
749 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
782 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
750 chunk = chunk[p + 1 :]
783 chunk = chunk[p + 1 :]
751 except ValueError:
784 except ValueError:
752 # substring '\n' not found, maybe the entry is bigger than the
785 # substring '\n' not found, maybe the entry is bigger than the
753 # chunksize, so let's keep iterating
786 # chunksize, so let's keep iterating
754 pass
787 pass
755
788
756 if chunk:
789 if chunk:
757 msg = _(b"fncache does not ends with a newline")
790 msg = _(b"fncache does not ends with a newline")
758 if warn:
791 if warn:
759 warn(msg + b'\n')
792 warn(msg + b'\n')
760 else:
793 else:
761 raise error.Abort(
794 raise error.Abort(
762 msg,
795 msg,
763 hint=_(
796 hint=_(
764 b"use 'hg debugrebuildfncache' to "
797 b"use 'hg debugrebuildfncache' to "
765 b"rebuild the fncache"
798 b"rebuild the fncache"
766 ),
799 ),
767 )
800 )
768 self._checkentries(fp, warn)
801 self._checkentries(fp, warn)
769 fp.close()
802 fp.close()
770
803
771 def _checkentries(self, fp, warn):
804 def _checkentries(self, fp, warn):
772 """make sure there is no empty string in entries"""
805 """make sure there is no empty string in entries"""
773 if b'' in self.entries:
806 if b'' in self.entries:
774 fp.seek(0)
807 fp.seek(0)
775 for n, line in enumerate(fp):
808 for n, line in enumerate(fp):
776 if not line.rstrip(b'\n'):
809 if not line.rstrip(b'\n'):
777 t = _(b'invalid entry in fncache, line %d') % (n + 1)
810 t = _(b'invalid entry in fncache, line %d') % (n + 1)
778 if warn:
811 if warn:
779 warn(t + b'\n')
812 warn(t + b'\n')
780 else:
813 else:
781 raise error.Abort(t)
814 raise error.Abort(t)
782
815
783 def write(self, tr):
816 def write(self, tr):
784 if self._dirty:
817 if self._dirty:
785 assert self.entries is not None
818 assert self.entries is not None
786 self.entries = self.entries | self.addls
819 self.entries = self.entries | self.addls
787 self.addls = set()
820 self.addls = set()
788 tr.addbackup(b'fncache')
821 tr.addbackup(b'fncache')
789 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
822 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
790 if self.entries:
823 if self.entries:
791 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
824 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
792 fp.close()
825 fp.close()
793 self._dirty = False
826 self._dirty = False
794 if self.addls:
827 if self.addls:
795 # if we have just new entries, let's append them to the fncache
828 # if we have just new entries, let's append them to the fncache
796 tr.addbackup(b'fncache')
829 tr.addbackup(b'fncache')
797 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
830 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
798 if self.addls:
831 if self.addls:
799 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
832 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
800 fp.close()
833 fp.close()
801 self.entries = None
834 self.entries = None
802 self.addls = set()
835 self.addls = set()
803
836
804 def addignore(self, fn):
837 def addignore(self, fn):
805 self._ignores.add(fn)
838 self._ignores.add(fn)
806
839
807 def add(self, fn):
840 def add(self, fn):
808 if fn in self._ignores:
841 if fn in self._ignores:
809 return
842 return
810 if self.entries is None:
843 if self.entries is None:
811 self._load()
844 self._load()
812 if fn not in self.entries:
845 if fn not in self.entries:
813 self.addls.add(fn)
846 self.addls.add(fn)
814
847
815 def remove(self, fn):
848 def remove(self, fn):
816 if self.entries is None:
849 if self.entries is None:
817 self._load()
850 self._load()
818 if fn in self.addls:
851 if fn in self.addls:
819 self.addls.remove(fn)
852 self.addls.remove(fn)
820 return
853 return
821 try:
854 try:
822 self.entries.remove(fn)
855 self.entries.remove(fn)
823 self._dirty = True
856 self._dirty = True
824 except KeyError:
857 except KeyError:
825 pass
858 pass
826
859
827 def __contains__(self, fn):
860 def __contains__(self, fn):
828 if fn in self.addls:
861 if fn in self.addls:
829 return True
862 return True
830 if self.entries is None:
863 if self.entries is None:
831 self._load()
864 self._load()
832 return fn in self.entries
865 return fn in self.entries
833
866
834 def __iter__(self):
867 def __iter__(self):
835 if self.entries is None:
868 if self.entries is None:
836 self._load()
869 self._load()
837 return iter(self.entries | self.addls)
870 return iter(self.entries | self.addls)
838
871
839
872
840 class _fncachevfs(vfsmod.proxyvfs):
873 class _fncachevfs(vfsmod.proxyvfs):
841 def __init__(self, vfs, fnc, encode):
874 def __init__(self, vfs, fnc, encode):
842 vfsmod.proxyvfs.__init__(self, vfs)
875 vfsmod.proxyvfs.__init__(self, vfs)
843 self.fncache = fnc
876 self.fncache = fnc
844 self.encode = encode
877 self.encode = encode
845
878
846 def __call__(self, path, mode=b'r', *args, **kw):
879 def __call__(self, path, mode=b'r', *args, **kw):
847 encoded = self.encode(path)
880 encoded = self.encode(path)
848 if (
881 if (
849 mode not in (b'r', b'rb')
882 mode not in (b'r', b'rb')
850 and (path.startswith(b'data/') or path.startswith(b'meta/'))
883 and (path.startswith(b'data/') or path.startswith(b'meta/'))
851 and revlog_type(path) is not None
884 and revlog_type(path) is not None
852 ):
885 ):
853 # do not trigger a fncache load when adding a file that already is
886 # do not trigger a fncache load when adding a file that already is
854 # known to exist.
887 # known to exist.
855 notload = self.fncache.entries is None and self.vfs.exists(encoded)
888 notload = self.fncache.entries is None and self.vfs.exists(encoded)
856 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
889 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
857 # when appending to an existing file, if the file has size zero,
890 # when appending to an existing file, if the file has size zero,
858 # it should be considered as missing. Such zero-size files are
891 # it should be considered as missing. Such zero-size files are
859 # the result of truncation when a transaction is aborted.
892 # the result of truncation when a transaction is aborted.
860 notload = False
893 notload = False
861 if not notload:
894 if not notload:
862 self.fncache.add(path)
895 self.fncache.add(path)
863 return self.vfs(encoded, mode, *args, **kw)
896 return self.vfs(encoded, mode, *args, **kw)
864
897
865 def join(self, path):
898 def join(self, path):
866 if path:
899 if path:
867 return self.vfs.join(self.encode(path))
900 return self.vfs.join(self.encode(path))
868 else:
901 else:
869 return self.vfs.join(path)
902 return self.vfs.join(path)
870
903
871 def register_file(self, path):
904 def register_file(self, path):
872 """generic hook point to lets fncache steer its stew"""
905 """generic hook point to lets fncache steer its stew"""
873 if path.startswith(b'data/') or path.startswith(b'meta/'):
906 if path.startswith(b'data/') or path.startswith(b'meta/'):
874 self.fncache.add(path)
907 self.fncache.add(path)
875
908
876
909
877 class fncachestore(basicstore):
910 class fncachestore(basicstore):
878 def __init__(self, path, vfstype, dotencode):
911 def __init__(self, path, vfstype, dotencode):
879 if dotencode:
912 if dotencode:
880 encode = _pathencode
913 encode = _pathencode
881 else:
914 else:
882 encode = _plainhybridencode
915 encode = _plainhybridencode
883 self.encode = encode
916 self.encode = encode
884 vfs = vfstype(path + b'/store')
917 vfs = vfstype(path + b'/store')
885 self.path = vfs.base
918 self.path = vfs.base
886 self.pathsep = self.path + b'/'
919 self.pathsep = self.path + b'/'
887 self.createmode = _calcmode(vfs)
920 self.createmode = _calcmode(vfs)
888 vfs.createmode = self.createmode
921 vfs.createmode = self.createmode
889 self.rawvfs = vfs
922 self.rawvfs = vfs
890 fnc = fncache(vfs)
923 fnc = fncache(vfs)
891 self.fncache = fnc
924 self.fncache = fnc
892 self.vfs = _fncachevfs(vfs, fnc, encode)
925 self.vfs = _fncachevfs(vfs, fnc, encode)
893 self.opener = self.vfs
926 self.opener = self.vfs
894
927
895 def join(self, f):
928 def join(self, f):
896 return self.pathsep + self.encode(f)
929 return self.pathsep + self.encode(f)
897
930
898 def getsize(self, path):
931 def getsize(self, path):
899 return self.rawvfs.stat(path).st_size
932 return self.rawvfs.stat(path).st_size
900
933
901 def datafiles(
934 def datafiles(
902 self, matcher=None, undecodable=None
935 self, matcher=None, undecodable=None
903 ) -> Generator[BaseStoreEntry, None, None]:
936 ) -> Generator[BaseStoreEntry, None, None]:
904 for f in sorted(self.fncache):
937 for f in sorted(self.fncache):
905 if not _matchtrackedpath(f, matcher):
938 if not _matchtrackedpath(f, matcher):
906 continue
939 continue
907 ef = self.encode(f)
940 ef = self.encode(f)
908 t = revlog_type(f)
941 t = revlog_type(f)
909 if t is None:
942 if t is None:
910 # Note: this should not be in the fncache then…
943 # Note: this should not be in the fncache then…
911 #
944 #
912 # However the fncache might contains such file added by
945 # However the fncache might contains such file added by
913 # previous version of Mercurial.
946 # previous version of Mercurial.
914 continue
947 continue
915 yield RevlogStoreEntry(
948 yield RevlogStoreEntry(
916 unencoded_path=f,
949 unencoded_path=f,
917 revlog_type=FILEFLAGS_FILELOG,
950 revlog_type=FILEFLAGS_FILELOG,
918 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
951 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
919 is_volatile=bool(t & FILEFLAGS_VOLATILE),
952 is_volatile=bool(t & FILEFLAGS_VOLATILE),
920 )
953 )
921
954
922 def copylist(self):
955 def copylist(self):
923 d = (
956 d = (
924 b'bookmarks',
957 b'bookmarks',
925 b'narrowspec',
958 b'narrowspec',
926 b'data',
959 b'data',
927 b'meta',
960 b'meta',
928 b'dh',
961 b'dh',
929 b'fncache',
962 b'fncache',
930 b'phaseroots',
963 b'phaseroots',
931 b'obsstore',
964 b'obsstore',
932 b'00manifest.d',
965 b'00manifest.d',
933 b'00manifest.i',
966 b'00manifest.i',
934 b'00changelog.d',
967 b'00changelog.d',
935 b'00changelog.i',
968 b'00changelog.i',
936 b'requires',
969 b'requires',
937 )
970 )
938 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
971 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
939
972
940 def write(self, tr):
973 def write(self, tr):
941 self.fncache.write(tr)
974 self.fncache.write(tr)
942
975
943 def invalidatecaches(self):
976 def invalidatecaches(self):
944 self.fncache.entries = None
977 self.fncache.entries = None
945 self.fncache.addls = set()
978 self.fncache.addls = set()
946
979
947 def markremoved(self, fn):
980 def markremoved(self, fn):
948 self.fncache.remove(fn)
981 self.fncache.remove(fn)
949
982
950 def _exists(self, f):
983 def _exists(self, f):
951 ef = self.encode(f)
984 ef = self.encode(f)
952 try:
985 try:
953 self.getsize(ef)
986 self.getsize(ef)
954 return True
987 return True
955 except FileNotFoundError:
988 except FileNotFoundError:
956 return False
989 return False
957
990
958 def __contains__(self, path):
991 def __contains__(self, path):
959 '''Checks if the store contains path'''
992 '''Checks if the store contains path'''
960 path = b"/".join((b"data", path))
993 path = b"/".join((b"data", path))
961 # check for files (exact match)
994 # check for files (exact match)
962 e = path + b'.i'
995 e = path + b'.i'
963 if e in self.fncache and self._exists(e):
996 if e in self.fncache and self._exists(e):
964 return True
997 return True
965 # now check for directories (prefix match)
998 # now check for directories (prefix match)
966 if not path.endswith(b'/'):
999 if not path.endswith(b'/'):
967 path += b'/'
1000 path += b'/'
968 for e in self.fncache:
1001 for e in self.fncache:
969 if e.startswith(path) and self._exists(e):
1002 if e.startswith(path) and self._exists(e):
970 return True
1003 return True
971 return False
1004 return False
General Comments 0
You need to be logged in to leave comments. Login now