##// END OF EJS Templates
store: do the revlog matching on entry directly...
marmoute -
r51387:b4953fad default
parent child Browse files
Show More
@@ -1,1058 +1,1056 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _match_tracked_entry(entry, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 if entry.revlog_type == FILEFLAGS_FILELOG:
45 if path.startswith(b'data/'):
45 return matcher(entry.target_id)
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
47 elif path.startswith(b'meta/'):
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
49
52
50
53 # This avoids a collision between a file named foo and a dir named
51 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
52 # foo.i or foo.d
55 def _encodedir(path):
53 def _encodedir(path):
56 """
54 """
57 >>> _encodedir(b'data/foo.i')
55 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
56 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
57 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
58 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
60 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
63 """
66 return (
64 return (
67 path.replace(b".hg/", b".hg.hg/")
65 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
66 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
67 .replace(b".d/", b".d.hg/")
70 )
68 )
71
69
72
70
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
72
75
73
76 def decodedir(path):
74 def decodedir(path):
77 """
75 """
78 >>> decodedir(b'data/foo.i')
76 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
77 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
78 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
79 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
81 'data/foo.i.hg/bla.i'
84 """
82 """
85 if b".hg/" not in path:
83 if b".hg/" not in path:
86 return path
84 return path
87 return (
85 return (
88 path.replace(b".d.hg/", b".d/")
86 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
87 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
88 .replace(b".hg.hg/", b".hg/")
91 )
89 )
92
90
93
91
94 def _reserved():
92 def _reserved():
95 """characters that are problematic for filesystems
93 """characters that are problematic for filesystems
96
94
97 * ascii escapes (0..31)
95 * ascii escapes (0..31)
98 * ascii hi (126..255)
96 * ascii hi (126..255)
99 * windows specials
97 * windows specials
100
98
101 these characters will be escaped by encodefunctions
99 these characters will be escaped by encodefunctions
102 """
100 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
102 for x in range(32):
105 yield x
103 yield x
106 for x in range(126, 256):
104 for x in range(126, 256):
107 yield x
105 yield x
108 for x in winreserved:
106 for x in winreserved:
109 yield x
107 yield x
110
108
111
109
112 def _buildencodefun():
110 def _buildencodefun():
113 """
111 """
114 >>> enc, dec = _buildencodefun()
112 >>> enc, dec = _buildencodefun()
115
113
116 >>> enc(b'nothing/special.txt')
114 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
115 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
116 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
117 'nothing/special.txt'
120
118
121 >>> enc(b'HELLO')
119 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
120 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
121 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
122 'HELLO'
125
123
126 >>> enc(b'hello:world?')
124 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
125 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
126 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
127 'hello:world?'
130
128
131 >>> enc(b'the\\x07quick\\xADshot')
129 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
130 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
131 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
132 'the\\x07quick\\xadshot'
135 """
133 """
136 e = b'_'
134 e = b'_'
137 xchr = pycompat.bytechr
135 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
136 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
138
141 cmap = {x: x for x in asciistr}
139 cmap = {x: x for x in asciistr}
142 for x in _reserved():
140 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
141 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
142 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
143 cmap[xchr(x)] = e + xchr(x).lower()
146
144
147 dmap = {}
145 dmap = {}
148 for k, v in cmap.items():
146 for k, v in cmap.items():
149 dmap[v] = k
147 dmap[v] = k
150
148
151 def decode(s):
149 def decode(s):
152 i = 0
150 i = 0
153 while i < len(s):
151 while i < len(s):
154 for l in range(1, 4):
152 for l in range(1, 4):
155 try:
153 try:
156 yield dmap[s[i : i + l]]
154 yield dmap[s[i : i + l]]
157 i += l
155 i += l
158 break
156 break
159 except KeyError:
157 except KeyError:
160 pass
158 pass
161 else:
159 else:
162 raise KeyError
160 raise KeyError
163
161
164 return (
162 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
164 lambda s: b''.join(list(decode(s))),
167 )
165 )
168
166
169
167
170 _encodefname, _decodefname = _buildencodefun()
168 _encodefname, _decodefname = _buildencodefun()
171
169
172
170
173 def encodefilename(s):
171 def encodefilename(s):
174 """
172 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
175 """
178 return _encodefname(encodedir(s))
176 return _encodefname(encodedir(s))
179
177
180
178
181 def decodefilename(s):
179 def decodefilename(s):
182 """
180 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
183 """
186 return decodedir(_decodefname(s))
184 return decodedir(_decodefname(s))
187
185
188
186
189 def _buildlowerencodefun():
187 def _buildlowerencodefun():
190 """
188 """
191 >>> f = _buildlowerencodefun()
189 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
190 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
191 'nothing/special.txt'
194 >>> f(b'HELLO')
192 >>> f(b'HELLO')
195 'hello'
193 'hello'
196 >>> f(b'hello:world?')
194 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
195 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
196 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
197 'the~07quick~adshot'
200 """
198 """
201 xchr = pycompat.bytechr
199 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
200 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
201 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
202 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
203 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
204 cmap[xchr(x)] = xchr(x).lower()
207
205
208 def lowerencode(s):
206 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
208
211 return lowerencode
209 return lowerencode
212
210
213
211
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
213
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
217
220
218
221 def _auxencode(path, dotencode):
219 def _auxencode(path, dotencode):
222 """
220 """
223 Encodes filenames containing names reserved by Windows or which end in
221 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
222 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
224 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
225 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
226 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
228 doesn't need encoding.
231
229
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
231 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
234 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
236 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
237 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
238 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
239 ['~20.foo']
242 """
240 """
243 for i, n in enumerate(path):
241 for i, n in enumerate(path):
244 if not n:
242 if not n:
245 continue
243 continue
246 if dotencode and n[0] in b'. ':
244 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
246 path[i] = n
249 else:
247 else:
250 l = n.find(b'.')
248 l = n.find(b'.')
251 if l == -1:
249 if l == -1:
252 l = len(n)
250 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
251 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
252 l == 4
255 and n[3:4] <= b'9'
253 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
254 and n[3:4] >= b'1'
257 and n[:3] in _winres4
255 and n[:3] in _winres4
258 ):
256 ):
259 # encode third letter ('aux' -> 'au~78')
257 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
258 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
259 n = n[0:2] + ec + n[3:]
262 path[i] = n
260 path[i] = n
263 if n[-1] in b'. ':
261 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
262 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
264 return path
267
265
268
266
269 _maxstorepathlen = 120
267 _maxstorepathlen = 120
270 _dirprefixlen = 8
268 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
270
273
271
274 def _hashencode(path, dotencode):
272 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
273 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
275 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
276 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
277 _root, ext = os.path.splitext(basename)
280 sdirs = []
278 sdirs = []
281 sdirslen = 0
279 sdirslen = 0
282 for p in parts[:-1]:
280 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
281 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
282 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
283 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
284 d = d[:-1] + b'_'
287 if sdirslen == 0:
285 if sdirslen == 0:
288 t = len(d)
286 t = len(d)
289 else:
287 else:
290 t = sdirslen + 1 + len(d)
288 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
289 if t > _maxshortdirslen:
292 break
290 break
293 sdirs.append(d)
291 sdirs.append(d)
294 sdirslen = t
292 sdirslen = t
295 dirs = b'/'.join(sdirs)
293 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
294 if len(dirs) > 0:
297 dirs += b'/'
295 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
296 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
297 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
298 if spaceleft > 0:
301 filler = basename[:spaceleft]
299 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
300 res = b'dh/' + dirs + filler + digest + ext
303 return res
301 return res
304
302
305
303
306 def _hybridencode(path, dotencode):
304 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
305 """encodes path with a length limit
308
306
309 Encodes all paths that begin with 'data/', according to the following.
307 Encodes all paths that begin with 'data/', according to the following.
310
308
311 Default encoding (reversible):
309 Default encoding (reversible):
312
310
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
312 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
313 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
314 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
316
319 Hashed encoding (not reversible):
317 Hashed encoding (not reversible):
320
318
321 If the default-encoded path is longer than _maxstorepathlen, a
319 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
320 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
321 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
322 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
323 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
324 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
325 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
326 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
327 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
328 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
329 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
330 The extension (e.g. '.i' or '.d') is preserved.
333
331
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
333 encoding was used.
336 """
334 """
337 path = encodedir(path)
335 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
336 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
337 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
338 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
339 res = _hashencode(path, dotencode)
342 return res
340 return res
343
341
344
342
345 def _pathencode(path):
343 def _pathencode(path):
346 de = encodedir(path)
344 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
345 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
346 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
347 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
348 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
349 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
350 return _hashencode(de, True)
353 return res
351 return res
354
352
355
353
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
355
358
356
359 def _plainhybridencode(f):
357 def _plainhybridencode(f):
360 return _hybridencode(f, False)
358 return _hybridencode(f, False)
361
359
362
360
363 def _calcmode(vfs):
361 def _calcmode(vfs):
364 try:
362 try:
365 # files in .hg/ will be created using this mode
363 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
364 mode = vfs.stat().st_mode
367 # avoid some useless chmods
365 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
366 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
367 mode = None
370 except OSError:
368 except OSError:
371 mode = None
369 mode = None
372 return mode
370 return mode
373
371
374
372
375 _data = [
373 _data = [
376 b'bookmarks',
374 b'bookmarks',
377 b'narrowspec',
375 b'narrowspec',
378 b'data',
376 b'data',
379 b'meta',
377 b'meta',
380 b'00manifest.d',
378 b'00manifest.d',
381 b'00manifest.i',
379 b'00manifest.i',
382 b'00changelog.d',
380 b'00changelog.d',
383 b'00changelog.i',
381 b'00changelog.i',
384 b'phaseroots',
382 b'phaseroots',
385 b'obsstore',
383 b'obsstore',
386 b'requires',
384 b'requires',
387 ]
385 ]
388
386
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
388 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
389 b'.idx',
392 b'.d',
390 b'.d',
393 b'.dat',
391 b'.dat',
394 b'.n',
392 b'.n',
395 b'.nd',
393 b'.nd',
396 b'.sda',
394 b'.sda',
397 )
395 )
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 REVLOG_FILES_LONG_EXT = (
397 REVLOG_FILES_LONG_EXT = (
400 b'.nd',
398 b'.nd',
401 b'.idx',
399 b'.idx',
402 b'.dat',
400 b'.dat',
403 b'.sda',
401 b'.sda',
404 )
402 )
405 # files that are "volatile" and might change between listing and streaming
403 # files that are "volatile" and might change between listing and streaming
406 #
404 #
407 # note: the ".nd" file are nodemap data and won't "change" but they might be
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 # deleted.
406 # deleted.
409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410
408
411 # some exception to the above matching
409 # some exception to the above matching
412 #
410 #
413 # XXX This is currently not in use because of issue6542
411 # XXX This is currently not in use because of issue6542
414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415
413
416
414
417 def is_revlog(f, kind, st):
415 def is_revlog(f, kind, st):
418 if kind != stat.S_IFREG:
416 if kind != stat.S_IFREG:
419 return None
417 return None
420 return revlog_type(f)
418 return revlog_type(f)
421
419
422
420
423 def revlog_type(f):
421 def revlog_type(f):
424 # XXX we need to filter `undo.` created by the transaction here, however
422 # XXX we need to filter `undo.` created by the transaction here, however
425 # being naive about it also filter revlog for `undo.*` files, leading to
423 # being naive about it also filter revlog for `undo.*` files, leading to
426 # issue6542. So we no longer use EXCLUDED.
424 # issue6542. So we no longer use EXCLUDED.
427 if f.endswith(REVLOG_FILES_MAIN_EXT):
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 return FILEFLAGS_REVLOG_MAIN
426 return FILEFLAGS_REVLOG_MAIN
429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 t = FILETYPE_FILELOG_OTHER
428 t = FILETYPE_FILELOG_OTHER
431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 t |= FILEFLAGS_VOLATILE
430 t |= FILEFLAGS_VOLATILE
433 return t
431 return t
434 return None
432 return None
435
433
436
434
437 # the file is part of changelog data
435 # the file is part of changelog data
438 FILEFLAGS_CHANGELOG = 1 << 13
436 FILEFLAGS_CHANGELOG = 1 << 13
439 # the file is part of manifest data
437 # the file is part of manifest data
440 FILEFLAGS_MANIFESTLOG = 1 << 12
438 FILEFLAGS_MANIFESTLOG = 1 << 12
441 # the file is part of filelog data
439 # the file is part of filelog data
442 FILEFLAGS_FILELOG = 1 << 11
440 FILEFLAGS_FILELOG = 1 << 11
443 # file that are not directly part of a revlog
441 # file that are not directly part of a revlog
444 FILEFLAGS_OTHER = 1 << 10
442 FILEFLAGS_OTHER = 1 << 10
445
443
446 # the main entry point for a revlog
444 # the main entry point for a revlog
447 FILEFLAGS_REVLOG_MAIN = 1 << 1
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 # a secondary file for a revlog
446 # a secondary file for a revlog
449 FILEFLAGS_REVLOG_OTHER = 1 << 0
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
450
448
451 # files that are "volatile" and might change between listing and streaming
449 # files that are "volatile" and might change between listing and streaming
452 FILEFLAGS_VOLATILE = 1 << 20
450 FILEFLAGS_VOLATILE = 1 << 20
453
451
454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 FILETYPE_OTHER = FILEFLAGS_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
461
459
462
460
463 @attr.s(slots=True, init=False)
461 @attr.s(slots=True, init=False)
464 class BaseStoreEntry:
462 class BaseStoreEntry:
465 """An entry in the store
463 """An entry in the store
466
464
467 This is returned by `store.walk` and represent some data in the store."""
465 This is returned by `store.walk` and represent some data in the store."""
468
466
469 unencoded_path = attr.ib()
467 unencoded_path = attr.ib()
470 _is_volatile = attr.ib(default=False)
468 _is_volatile = attr.ib(default=False)
471 _file_size = attr.ib(default=None)
469 _file_size = attr.ib(default=None)
472
470
473 def __init__(
471 def __init__(
474 self,
472 self,
475 unencoded_path,
473 unencoded_path,
476 is_volatile=False,
474 is_volatile=False,
477 file_size=None,
475 file_size=None,
478 ):
476 ):
479 self.unencoded_path = unencoded_path
477 self.unencoded_path = unencoded_path
480 self._is_volatile = is_volatile
478 self._is_volatile = is_volatile
481 self._file_size = file_size
479 self._file_size = file_size
482
480
483 def files(self):
481 def files(self):
484 return [
482 return [
485 StoreFile(
483 StoreFile(
486 unencoded_path=self.unencoded_path,
484 unencoded_path=self.unencoded_path,
487 file_size=self._file_size,
485 file_size=self._file_size,
488 is_volatile=self._is_volatile,
486 is_volatile=self._is_volatile,
489 )
487 )
490 ]
488 ]
491
489
492
490
493 @attr.s(slots=True, init=False)
491 @attr.s(slots=True, init=False)
494 class SimpleStoreEntry(BaseStoreEntry):
492 class SimpleStoreEntry(BaseStoreEntry):
495 """A generic entry in the store"""
493 """A generic entry in the store"""
496
494
497 is_revlog = False
495 is_revlog = False
498
496
499
497
500 @attr.s(slots=True, init=False)
498 @attr.s(slots=True, init=False)
501 class RevlogStoreEntry(BaseStoreEntry):
499 class RevlogStoreEntry(BaseStoreEntry):
502 """A revlog entry in the store"""
500 """A revlog entry in the store"""
503
501
504 is_revlog = True
502 is_revlog = True
505 revlog_type = attr.ib(default=None)
503 revlog_type = attr.ib(default=None)
506 target_id = attr.ib(default=None)
504 target_id = attr.ib(default=None)
507 is_revlog_main = attr.ib(default=None)
505 is_revlog_main = attr.ib(default=None)
508
506
509 def __init__(
507 def __init__(
510 self,
508 self,
511 unencoded_path,
509 unencoded_path,
512 revlog_type,
510 revlog_type,
513 target_id,
511 target_id,
514 is_revlog_main=False,
512 is_revlog_main=False,
515 is_volatile=False,
513 is_volatile=False,
516 file_size=None,
514 file_size=None,
517 ):
515 ):
518 super().__init__(
516 super().__init__(
519 unencoded_path=unencoded_path,
517 unencoded_path=unencoded_path,
520 is_volatile=is_volatile,
518 is_volatile=is_volatile,
521 file_size=file_size,
519 file_size=file_size,
522 )
520 )
523 self.revlog_type = revlog_type
521 self.revlog_type = revlog_type
524 self.target_id = target_id
522 self.target_id = target_id
525 self.is_revlog_main = is_revlog_main
523 self.is_revlog_main = is_revlog_main
526
524
527 def main_file_path(self):
525 def main_file_path(self):
528 """unencoded path of the main revlog file"""
526 """unencoded path of the main revlog file"""
529 return self.unencoded_path
527 return self.unencoded_path
530
528
531
529
532 @attr.s(slots=True)
530 @attr.s(slots=True)
533 class StoreFile:
531 class StoreFile:
534 """a file matching an entry"""
532 """a file matching an entry"""
535
533
536 unencoded_path = attr.ib()
534 unencoded_path = attr.ib()
537 _file_size = attr.ib(default=False)
535 _file_size = attr.ib(default=False)
538 is_volatile = attr.ib(default=False)
536 is_volatile = attr.ib(default=False)
539
537
540 def file_size(self, vfs):
538 def file_size(self, vfs):
541 if self._file_size is not None:
539 if self._file_size is not None:
542 return self._file_size
540 return self._file_size
543 try:
541 try:
544 return vfs.stat(self.unencoded_path).st_size
542 return vfs.stat(self.unencoded_path).st_size
545 except FileNotFoundError:
543 except FileNotFoundError:
546 return 0
544 return 0
547
545
548
546
549 def _gather_revlog(files_data):
547 def _gather_revlog(files_data):
550 """group files per revlog prefix
548 """group files per revlog prefix
551
549
552 The returns a two level nested dict. The top level key is the revlog prefix
550 The returns a two level nested dict. The top level key is the revlog prefix
553 without extension, the second level is all the file "suffix" that were
551 without extension, the second level is all the file "suffix" that were
554 seen for this revlog and arbitrary file data as value.
552 seen for this revlog and arbitrary file data as value.
555 """
553 """
556 revlogs = collections.defaultdict(dict)
554 revlogs = collections.defaultdict(dict)
557 for u, value in files_data:
555 for u, value in files_data:
558 name, ext = _split_revlog_ext(u)
556 name, ext = _split_revlog_ext(u)
559 revlogs[name][ext] = value
557 revlogs[name][ext] = value
560 return sorted(revlogs.items())
558 return sorted(revlogs.items())
561
559
562
560
563 def _split_revlog_ext(filename):
561 def _split_revlog_ext(filename):
564 """split the revlog file prefix from the variable extension"""
562 """split the revlog file prefix from the variable extension"""
565 if filename.endswith(REVLOG_FILES_LONG_EXT):
563 if filename.endswith(REVLOG_FILES_LONG_EXT):
566 char = b'-'
564 char = b'-'
567 else:
565 else:
568 char = b'.'
566 char = b'.'
569 idx = filename.rfind(char)
567 idx = filename.rfind(char)
570 return filename[:idx], filename[idx:]
568 return filename[:idx], filename[idx:]
571
569
572
570
573 def _ext_key(ext):
571 def _ext_key(ext):
574 """a key to order revlog suffix
572 """a key to order revlog suffix
575
573
576 important to issue .i after other entry."""
574 important to issue .i after other entry."""
577 # the only important part of this order is to keep the `.i` last.
575 # the only important part of this order is to keep the `.i` last.
578 if ext.endswith(b'.n'):
576 if ext.endswith(b'.n'):
579 return (0, ext)
577 return (0, ext)
580 elif ext.endswith(b'.nd'):
578 elif ext.endswith(b'.nd'):
581 return (10, ext)
579 return (10, ext)
582 elif ext.endswith(b'.d'):
580 elif ext.endswith(b'.d'):
583 return (20, ext)
581 return (20, ext)
584 elif ext.endswith(b'.i'):
582 elif ext.endswith(b'.i'):
585 return (50, ext)
583 return (50, ext)
586 else:
584 else:
587 return (40, ext)
585 return (40, ext)
588
586
589
587
590 class basicstore:
588 class basicstore:
591 '''base class for local repository stores'''
589 '''base class for local repository stores'''
592
590
593 def __init__(self, path, vfstype):
591 def __init__(self, path, vfstype):
594 vfs = vfstype(path)
592 vfs = vfstype(path)
595 self.path = vfs.base
593 self.path = vfs.base
596 self.createmode = _calcmode(vfs)
594 self.createmode = _calcmode(vfs)
597 vfs.createmode = self.createmode
595 vfs.createmode = self.createmode
598 self.rawvfs = vfs
596 self.rawvfs = vfs
599 self.vfs = vfsmod.filtervfs(vfs, encodedir)
597 self.vfs = vfsmod.filtervfs(vfs, encodedir)
600 self.opener = self.vfs
598 self.opener = self.vfs
601
599
602 def join(self, f):
600 def join(self, f):
603 return self.path + b'/' + encodedir(f)
601 return self.path + b'/' + encodedir(f)
604
602
605 def _walk(self, relpath, recurse, undecodable=None):
603 def _walk(self, relpath, recurse, undecodable=None):
606 '''yields (revlog_type, unencoded, size)'''
604 '''yields (revlog_type, unencoded, size)'''
607 path = self.path
605 path = self.path
608 if relpath:
606 if relpath:
609 path += b'/' + relpath
607 path += b'/' + relpath
610 striplen = len(self.path) + 1
608 striplen = len(self.path) + 1
611 l = []
609 l = []
612 if self.rawvfs.isdir(path):
610 if self.rawvfs.isdir(path):
613 visit = [path]
611 visit = [path]
614 readdir = self.rawvfs.readdir
612 readdir = self.rawvfs.readdir
615 while visit:
613 while visit:
616 p = visit.pop()
614 p = visit.pop()
617 for f, kind, st in readdir(p, stat=True):
615 for f, kind, st in readdir(p, stat=True):
618 fp = p + b'/' + f
616 fp = p + b'/' + f
619 rl_type = is_revlog(f, kind, st)
617 rl_type = is_revlog(f, kind, st)
620 if rl_type is not None:
618 if rl_type is not None:
621 n = util.pconvert(fp[striplen:])
619 n = util.pconvert(fp[striplen:])
622 l.append((decodedir(n), (rl_type, st.st_size)))
620 l.append((decodedir(n), (rl_type, st.st_size)))
623 elif kind == stat.S_IFDIR and recurse:
621 elif kind == stat.S_IFDIR and recurse:
624 visit.append(fp)
622 visit.append(fp)
625
623
626 l.sort()
624 l.sort()
627 return l
625 return l
628
626
629 def changelog(self, trypending, concurrencychecker=None):
627 def changelog(self, trypending, concurrencychecker=None):
630 return changelog.changelog(
628 return changelog.changelog(
631 self.vfs,
629 self.vfs,
632 trypending=trypending,
630 trypending=trypending,
633 concurrencychecker=concurrencychecker,
631 concurrencychecker=concurrencychecker,
634 )
632 )
635
633
636 def manifestlog(self, repo, storenarrowmatch):
634 def manifestlog(self, repo, storenarrowmatch):
637 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
635 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
638 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
636 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
639
637
640 def datafiles(
638 def datafiles(
641 self, matcher=None, undecodable=None
639 self, matcher=None, undecodable=None
642 ) -> Generator[BaseStoreEntry, None, None]:
640 ) -> Generator[BaseStoreEntry, None, None]:
643 """Like walk, but excluding the changelog and root manifest.
641 """Like walk, but excluding the changelog and root manifest.
644
642
645 When [undecodable] is None, revlogs names that can't be
643 When [undecodable] is None, revlogs names that can't be
646 decoded cause an exception. When it is provided, it should
644 decoded cause an exception. When it is provided, it should
647 be a list and the filenames that can't be decoded are added
645 be a list and the filenames that can't be decoded are added
648 to it instead. This is very rarely needed."""
646 to it instead. This is very rarely needed."""
649 dirs = [
647 dirs = [
650 (b'data', FILEFLAGS_FILELOG),
648 (b'data', FILEFLAGS_FILELOG),
651 (b'meta', FILEFLAGS_MANIFESTLOG),
649 (b'meta', FILEFLAGS_MANIFESTLOG),
652 ]
650 ]
653 for base_dir, rl_type in dirs:
651 for base_dir, rl_type in dirs:
654 files = self._walk(base_dir, True, undecodable=undecodable)
652 files = self._walk(base_dir, True, undecodable=undecodable)
655 files = (f for f in files if f[1][0] is not None)
653 files = (f for f in files if f[1][0] is not None)
656 for revlog, details in _gather_revlog(files):
654 for revlog, details in _gather_revlog(files):
657 for ext, (t, s) in sorted(details.items()):
655 for ext, (t, s) in sorted(details.items()):
658 u = revlog + ext
656 u = revlog + ext
659 revlog_target_id = revlog.split(b'/', 1)[1]
657 revlog_target_id = revlog.split(b'/', 1)[1]
660 yield RevlogStoreEntry(
658 yield RevlogStoreEntry(
661 unencoded_path=u,
659 unencoded_path=u,
662 revlog_type=rl_type,
660 revlog_type=rl_type,
663 target_id=revlog_target_id,
661 target_id=revlog_target_id,
664 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
662 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
665 is_volatile=bool(t & FILEFLAGS_VOLATILE),
663 is_volatile=bool(t & FILEFLAGS_VOLATILE),
666 file_size=s,
664 file_size=s,
667 )
665 )
668
666
669 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
667 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
670 files = reversed(self._walk(b'', False))
668 files = reversed(self._walk(b'', False))
671
669
672 changelogs = collections.defaultdict(dict)
670 changelogs = collections.defaultdict(dict)
673 manifestlogs = collections.defaultdict(dict)
671 manifestlogs = collections.defaultdict(dict)
674
672
675 for u, (t, s) in files:
673 for u, (t, s) in files:
676 if u.startswith(b'00changelog'):
674 if u.startswith(b'00changelog'):
677 name, ext = _split_revlog_ext(u)
675 name, ext = _split_revlog_ext(u)
678 changelogs[name][ext] = (t, s)
676 changelogs[name][ext] = (t, s)
679 elif u.startswith(b'00manifest'):
677 elif u.startswith(b'00manifest'):
680 name, ext = _split_revlog_ext(u)
678 name, ext = _split_revlog_ext(u)
681 manifestlogs[name][ext] = (t, s)
679 manifestlogs[name][ext] = (t, s)
682 else:
680 else:
683 yield SimpleStoreEntry(
681 yield SimpleStoreEntry(
684 unencoded_path=u,
682 unencoded_path=u,
685 is_volatile=bool(t & FILEFLAGS_VOLATILE),
683 is_volatile=bool(t & FILEFLAGS_VOLATILE),
686 file_size=s,
684 file_size=s,
687 )
685 )
688 # yield manifest before changelog
686 # yield manifest before changelog
689 top_rl = [
687 top_rl = [
690 (manifestlogs, FILEFLAGS_MANIFESTLOG),
688 (manifestlogs, FILEFLAGS_MANIFESTLOG),
691 (changelogs, FILEFLAGS_CHANGELOG),
689 (changelogs, FILEFLAGS_CHANGELOG),
692 ]
690 ]
693 assert len(manifestlogs) <= 1
691 assert len(manifestlogs) <= 1
694 assert len(changelogs) <= 1
692 assert len(changelogs) <= 1
695 for data, revlog_type in top_rl:
693 for data, revlog_type in top_rl:
696 for revlog, details in sorted(data.items()):
694 for revlog, details in sorted(data.items()):
697 # (keeping ordering so we get 00changelog.i last)
695 # (keeping ordering so we get 00changelog.i last)
698 key = lambda x: _ext_key(x[0])
696 key = lambda x: _ext_key(x[0])
699 for ext, (t, s) in sorted(details.items(), key=key):
697 for ext, (t, s) in sorted(details.items(), key=key):
700 u = revlog + ext
698 u = revlog + ext
701 yield RevlogStoreEntry(
699 yield RevlogStoreEntry(
702 unencoded_path=u,
700 unencoded_path=u,
703 revlog_type=revlog_type,
701 revlog_type=revlog_type,
704 target_id=b'',
702 target_id=b'',
705 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
703 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
706 is_volatile=bool(t & FILEFLAGS_VOLATILE),
704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
707 file_size=s,
705 file_size=s,
708 )
706 )
709
707
710 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
708 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
711 """return files related to data storage (ie: revlogs)
709 """return files related to data storage (ie: revlogs)
712
710
713 yields (file_type, unencoded, size)
711 yields (file_type, unencoded, size)
714
712
715 if a matcher is passed, storage files of only those tracked paths
713 if a matcher is passed, storage files of only those tracked paths
716 are passed with matches the matcher
714 are passed with matches the matcher
717 """
715 """
718 # yield data files first
716 # yield data files first
719 for x in self.datafiles(matcher):
717 for x in self.datafiles(matcher):
720 yield x
718 yield x
721 for x in self.topfiles():
719 for x in self.topfiles():
722 yield x
720 yield x
723
721
724 def copylist(self):
722 def copylist(self):
725 return _data
723 return _data
726
724
727 def write(self, tr):
725 def write(self, tr):
728 pass
726 pass
729
727
730 def invalidatecaches(self):
728 def invalidatecaches(self):
731 pass
729 pass
732
730
733 def markremoved(self, fn):
731 def markremoved(self, fn):
734 pass
732 pass
735
733
736 def __contains__(self, path):
734 def __contains__(self, path):
737 '''Checks if the store contains path'''
735 '''Checks if the store contains path'''
738 path = b"/".join((b"data", path))
736 path = b"/".join((b"data", path))
739 # file?
737 # file?
740 if self.vfs.exists(path + b".i"):
738 if self.vfs.exists(path + b".i"):
741 return True
739 return True
742 # dir?
740 # dir?
743 if not path.endswith(b"/"):
741 if not path.endswith(b"/"):
744 path = path + b"/"
742 path = path + b"/"
745 return self.vfs.exists(path)
743 return self.vfs.exists(path)
746
744
747
745
748 class encodedstore(basicstore):
746 class encodedstore(basicstore):
749 def __init__(self, path, vfstype):
747 def __init__(self, path, vfstype):
750 vfs = vfstype(path + b'/store')
748 vfs = vfstype(path + b'/store')
751 self.path = vfs.base
749 self.path = vfs.base
752 self.createmode = _calcmode(vfs)
750 self.createmode = _calcmode(vfs)
753 vfs.createmode = self.createmode
751 vfs.createmode = self.createmode
754 self.rawvfs = vfs
752 self.rawvfs = vfs
755 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
753 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
756 self.opener = self.vfs
754 self.opener = self.vfs
757
755
758 def _walk(self, relpath, recurse, undecodable=None):
756 def _walk(self, relpath, recurse, undecodable=None):
759 old = super()._walk(relpath, recurse)
757 old = super()._walk(relpath, recurse)
760 new = []
758 new = []
761 for f1, value in old:
759 for f1, value in old:
762 try:
760 try:
763 f2 = decodefilename(f1)
761 f2 = decodefilename(f1)
764 except KeyError:
762 except KeyError:
765 if undecodable is None:
763 if undecodable is None:
766 msg = _(b'undecodable revlog name %s') % f1
764 msg = _(b'undecodable revlog name %s') % f1
767 raise error.StorageError(msg)
765 raise error.StorageError(msg)
768 else:
766 else:
769 undecodable.append(f1)
767 undecodable.append(f1)
770 continue
768 continue
771 new.append((f2, value))
769 new.append((f2, value))
772 return new
770 return new
773
771
774 def datafiles(
772 def datafiles(
775 self, matcher=None, undecodable=None
773 self, matcher=None, undecodable=None
776 ) -> Generator[BaseStoreEntry, None, None]:
774 ) -> Generator[BaseStoreEntry, None, None]:
777 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
775 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
778 for entry in entries:
776 for entry in entries:
779 if _matchtrackedpath(entry.unencoded_path, matcher):
777 if _match_tracked_entry(entry, matcher):
780 yield entry
778 yield entry
781
779
782 def join(self, f):
780 def join(self, f):
783 return self.path + b'/' + encodefilename(f)
781 return self.path + b'/' + encodefilename(f)
784
782
785 def copylist(self):
783 def copylist(self):
786 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
784 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
787
785
788
786
789 class fncache:
787 class fncache:
790 # the filename used to be partially encoded
788 # the filename used to be partially encoded
791 # hence the encodedir/decodedir dance
789 # hence the encodedir/decodedir dance
792 def __init__(self, vfs):
790 def __init__(self, vfs):
793 self.vfs = vfs
791 self.vfs = vfs
794 self._ignores = set()
792 self._ignores = set()
795 self.entries = None
793 self.entries = None
796 self._dirty = False
794 self._dirty = False
797 # set of new additions to fncache
795 # set of new additions to fncache
798 self.addls = set()
796 self.addls = set()
799
797
800 def ensureloaded(self, warn=None):
798 def ensureloaded(self, warn=None):
801 """read the fncache file if not already read.
799 """read the fncache file if not already read.
802
800
803 If the file on disk is corrupted, raise. If warn is provided,
801 If the file on disk is corrupted, raise. If warn is provided,
804 warn and keep going instead."""
802 warn and keep going instead."""
805 if self.entries is None:
803 if self.entries is None:
806 self._load(warn)
804 self._load(warn)
807
805
808 def _load(self, warn=None):
806 def _load(self, warn=None):
809 '''fill the entries from the fncache file'''
807 '''fill the entries from the fncache file'''
810 self._dirty = False
808 self._dirty = False
811 try:
809 try:
812 fp = self.vfs(b'fncache', mode=b'rb')
810 fp = self.vfs(b'fncache', mode=b'rb')
813 except IOError:
811 except IOError:
814 # skip nonexistent file
812 # skip nonexistent file
815 self.entries = set()
813 self.entries = set()
816 return
814 return
817
815
818 self.entries = set()
816 self.entries = set()
819 chunk = b''
817 chunk = b''
820 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
818 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
821 chunk += c
819 chunk += c
822 try:
820 try:
823 p = chunk.rindex(b'\n')
821 p = chunk.rindex(b'\n')
824 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
822 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
825 chunk = chunk[p + 1 :]
823 chunk = chunk[p + 1 :]
826 except ValueError:
824 except ValueError:
827 # substring '\n' not found, maybe the entry is bigger than the
825 # substring '\n' not found, maybe the entry is bigger than the
828 # chunksize, so let's keep iterating
826 # chunksize, so let's keep iterating
829 pass
827 pass
830
828
831 if chunk:
829 if chunk:
832 msg = _(b"fncache does not ends with a newline")
830 msg = _(b"fncache does not ends with a newline")
833 if warn:
831 if warn:
834 warn(msg + b'\n')
832 warn(msg + b'\n')
835 else:
833 else:
836 raise error.Abort(
834 raise error.Abort(
837 msg,
835 msg,
838 hint=_(
836 hint=_(
839 b"use 'hg debugrebuildfncache' to "
837 b"use 'hg debugrebuildfncache' to "
840 b"rebuild the fncache"
838 b"rebuild the fncache"
841 ),
839 ),
842 )
840 )
843 self._checkentries(fp, warn)
841 self._checkentries(fp, warn)
844 fp.close()
842 fp.close()
845
843
846 def _checkentries(self, fp, warn):
844 def _checkentries(self, fp, warn):
847 """make sure there is no empty string in entries"""
845 """make sure there is no empty string in entries"""
848 if b'' in self.entries:
846 if b'' in self.entries:
849 fp.seek(0)
847 fp.seek(0)
850 for n, line in enumerate(fp):
848 for n, line in enumerate(fp):
851 if not line.rstrip(b'\n'):
849 if not line.rstrip(b'\n'):
852 t = _(b'invalid entry in fncache, line %d') % (n + 1)
850 t = _(b'invalid entry in fncache, line %d') % (n + 1)
853 if warn:
851 if warn:
854 warn(t + b'\n')
852 warn(t + b'\n')
855 else:
853 else:
856 raise error.Abort(t)
854 raise error.Abort(t)
857
855
858 def write(self, tr):
856 def write(self, tr):
859 if self._dirty:
857 if self._dirty:
860 assert self.entries is not None
858 assert self.entries is not None
861 self.entries = self.entries | self.addls
859 self.entries = self.entries | self.addls
862 self.addls = set()
860 self.addls = set()
863 tr.addbackup(b'fncache')
861 tr.addbackup(b'fncache')
864 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
862 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
865 if self.entries:
863 if self.entries:
866 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
864 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
867 fp.close()
865 fp.close()
868 self._dirty = False
866 self._dirty = False
869 if self.addls:
867 if self.addls:
870 # if we have just new entries, let's append them to the fncache
868 # if we have just new entries, let's append them to the fncache
871 tr.addbackup(b'fncache')
869 tr.addbackup(b'fncache')
872 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
870 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
873 if self.addls:
871 if self.addls:
874 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
872 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
875 fp.close()
873 fp.close()
876 self.entries = None
874 self.entries = None
877 self.addls = set()
875 self.addls = set()
878
876
879 def addignore(self, fn):
877 def addignore(self, fn):
880 self._ignores.add(fn)
878 self._ignores.add(fn)
881
879
882 def add(self, fn):
880 def add(self, fn):
883 if fn in self._ignores:
881 if fn in self._ignores:
884 return
882 return
885 if self.entries is None:
883 if self.entries is None:
886 self._load()
884 self._load()
887 if fn not in self.entries:
885 if fn not in self.entries:
888 self.addls.add(fn)
886 self.addls.add(fn)
889
887
890 def remove(self, fn):
888 def remove(self, fn):
891 if self.entries is None:
889 if self.entries is None:
892 self._load()
890 self._load()
893 if fn in self.addls:
891 if fn in self.addls:
894 self.addls.remove(fn)
892 self.addls.remove(fn)
895 return
893 return
896 try:
894 try:
897 self.entries.remove(fn)
895 self.entries.remove(fn)
898 self._dirty = True
896 self._dirty = True
899 except KeyError:
897 except KeyError:
900 pass
898 pass
901
899
902 def __contains__(self, fn):
900 def __contains__(self, fn):
903 if fn in self.addls:
901 if fn in self.addls:
904 return True
902 return True
905 if self.entries is None:
903 if self.entries is None:
906 self._load()
904 self._load()
907 return fn in self.entries
905 return fn in self.entries
908
906
909 def __iter__(self):
907 def __iter__(self):
910 if self.entries is None:
908 if self.entries is None:
911 self._load()
909 self._load()
912 return iter(self.entries | self.addls)
910 return iter(self.entries | self.addls)
913
911
914
912
915 class _fncachevfs(vfsmod.proxyvfs):
913 class _fncachevfs(vfsmod.proxyvfs):
916 def __init__(self, vfs, fnc, encode):
914 def __init__(self, vfs, fnc, encode):
917 vfsmod.proxyvfs.__init__(self, vfs)
915 vfsmod.proxyvfs.__init__(self, vfs)
918 self.fncache = fnc
916 self.fncache = fnc
919 self.encode = encode
917 self.encode = encode
920
918
921 def __call__(self, path, mode=b'r', *args, **kw):
919 def __call__(self, path, mode=b'r', *args, **kw):
922 encoded = self.encode(path)
920 encoded = self.encode(path)
923 if (
921 if (
924 mode not in (b'r', b'rb')
922 mode not in (b'r', b'rb')
925 and (path.startswith(b'data/') or path.startswith(b'meta/'))
923 and (path.startswith(b'data/') or path.startswith(b'meta/'))
926 and revlog_type(path) is not None
924 and revlog_type(path) is not None
927 ):
925 ):
928 # do not trigger a fncache load when adding a file that already is
926 # do not trigger a fncache load when adding a file that already is
929 # known to exist.
927 # known to exist.
930 notload = self.fncache.entries is None and self.vfs.exists(encoded)
928 notload = self.fncache.entries is None and self.vfs.exists(encoded)
931 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
929 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
932 # when appending to an existing file, if the file has size zero,
930 # when appending to an existing file, if the file has size zero,
933 # it should be considered as missing. Such zero-size files are
931 # it should be considered as missing. Such zero-size files are
934 # the result of truncation when a transaction is aborted.
932 # the result of truncation when a transaction is aborted.
935 notload = False
933 notload = False
936 if not notload:
934 if not notload:
937 self.fncache.add(path)
935 self.fncache.add(path)
938 return self.vfs(encoded, mode, *args, **kw)
936 return self.vfs(encoded, mode, *args, **kw)
939
937
940 def join(self, path):
938 def join(self, path):
941 if path:
939 if path:
942 return self.vfs.join(self.encode(path))
940 return self.vfs.join(self.encode(path))
943 else:
941 else:
944 return self.vfs.join(path)
942 return self.vfs.join(path)
945
943
946 def register_file(self, path):
944 def register_file(self, path):
947 """generic hook point to lets fncache steer its stew"""
945 """generic hook point to lets fncache steer its stew"""
948 if path.startswith(b'data/') or path.startswith(b'meta/'):
946 if path.startswith(b'data/') or path.startswith(b'meta/'):
949 self.fncache.add(path)
947 self.fncache.add(path)
950
948
951
949
952 class fncachestore(basicstore):
950 class fncachestore(basicstore):
953 def __init__(self, path, vfstype, dotencode):
951 def __init__(self, path, vfstype, dotencode):
954 if dotencode:
952 if dotencode:
955 encode = _pathencode
953 encode = _pathencode
956 else:
954 else:
957 encode = _plainhybridencode
955 encode = _plainhybridencode
958 self.encode = encode
956 self.encode = encode
959 vfs = vfstype(path + b'/store')
957 vfs = vfstype(path + b'/store')
960 self.path = vfs.base
958 self.path = vfs.base
961 self.pathsep = self.path + b'/'
959 self.pathsep = self.path + b'/'
962 self.createmode = _calcmode(vfs)
960 self.createmode = _calcmode(vfs)
963 vfs.createmode = self.createmode
961 vfs.createmode = self.createmode
964 self.rawvfs = vfs
962 self.rawvfs = vfs
965 fnc = fncache(vfs)
963 fnc = fncache(vfs)
966 self.fncache = fnc
964 self.fncache = fnc
967 self.vfs = _fncachevfs(vfs, fnc, encode)
965 self.vfs = _fncachevfs(vfs, fnc, encode)
968 self.opener = self.vfs
966 self.opener = self.vfs
969
967
970 def join(self, f):
968 def join(self, f):
971 return self.pathsep + self.encode(f)
969 return self.pathsep + self.encode(f)
972
970
973 def getsize(self, path):
971 def getsize(self, path):
974 return self.rawvfs.stat(path).st_size
972 return self.rawvfs.stat(path).st_size
975
973
976 def datafiles(
974 def datafiles(
977 self, matcher=None, undecodable=None
975 self, matcher=None, undecodable=None
978 ) -> Generator[BaseStoreEntry, None, None]:
976 ) -> Generator[BaseStoreEntry, None, None]:
979 files = ((f, revlog_type(f)) for f in self.fncache)
977 files = ((f, revlog_type(f)) for f in self.fncache)
980 # Note: all files in fncache should be revlog related, However the
978 # Note: all files in fncache should be revlog related, However the
981 # fncache might contains such file added by previous version of
979 # fncache might contains such file added by previous version of
982 # Mercurial.
980 # Mercurial.
983 files = (f for f in files if f[1] is not None)
981 files = (f for f in files if f[1] is not None)
984 by_revlog = _gather_revlog(files)
982 by_revlog = _gather_revlog(files)
985 for revlog, details in by_revlog:
983 for revlog, details in by_revlog:
986 if revlog.startswith(b'data/'):
984 if revlog.startswith(b'data/'):
987 rl_type = FILEFLAGS_FILELOG
985 rl_type = FILEFLAGS_FILELOG
988 revlog_target_id = revlog.split(b'/', 1)[1]
986 revlog_target_id = revlog.split(b'/', 1)[1]
989 elif revlog.startswith(b'meta/'):
987 elif revlog.startswith(b'meta/'):
990 rl_type = FILEFLAGS_MANIFESTLOG
988 rl_type = FILEFLAGS_MANIFESTLOG
991 # drop the initial directory and the `00manifest` file part
989 # drop the initial directory and the `00manifest` file part
992 tmp = revlog.split(b'/', 1)[1]
990 tmp = revlog.split(b'/', 1)[1]
993 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
991 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
994 else:
992 else:
995 # unreachable
993 # unreachable
996 assert False, revlog
994 assert False, revlog
997 for ext, t in sorted(details.items()):
995 for ext, t in sorted(details.items()):
998 f = revlog + ext
996 f = revlog + ext
999 if not _matchtrackedpath(f, matcher):
997 entry = RevlogStoreEntry(
1000 continue
1001 yield RevlogStoreEntry(
1002 unencoded_path=f,
998 unencoded_path=f,
1003 revlog_type=rl_type,
999 revlog_type=rl_type,
1004 target_id=revlog_target_id,
1000 target_id=revlog_target_id,
1005 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1001 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1006 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1002 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1007 )
1003 )
1004 if _match_tracked_entry(entry, matcher):
1005 yield entry
1008
1006
1009 def copylist(self):
1007 def copylist(self):
1010 d = (
1008 d = (
1011 b'bookmarks',
1009 b'bookmarks',
1012 b'narrowspec',
1010 b'narrowspec',
1013 b'data',
1011 b'data',
1014 b'meta',
1012 b'meta',
1015 b'dh',
1013 b'dh',
1016 b'fncache',
1014 b'fncache',
1017 b'phaseroots',
1015 b'phaseroots',
1018 b'obsstore',
1016 b'obsstore',
1019 b'00manifest.d',
1017 b'00manifest.d',
1020 b'00manifest.i',
1018 b'00manifest.i',
1021 b'00changelog.d',
1019 b'00changelog.d',
1022 b'00changelog.i',
1020 b'00changelog.i',
1023 b'requires',
1021 b'requires',
1024 )
1022 )
1025 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1023 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1026
1024
1027 def write(self, tr):
1025 def write(self, tr):
1028 self.fncache.write(tr)
1026 self.fncache.write(tr)
1029
1027
1030 def invalidatecaches(self):
1028 def invalidatecaches(self):
1031 self.fncache.entries = None
1029 self.fncache.entries = None
1032 self.fncache.addls = set()
1030 self.fncache.addls = set()
1033
1031
1034 def markremoved(self, fn):
1032 def markremoved(self, fn):
1035 self.fncache.remove(fn)
1033 self.fncache.remove(fn)
1036
1034
1037 def _exists(self, f):
1035 def _exists(self, f):
1038 ef = self.encode(f)
1036 ef = self.encode(f)
1039 try:
1037 try:
1040 self.getsize(ef)
1038 self.getsize(ef)
1041 return True
1039 return True
1042 except FileNotFoundError:
1040 except FileNotFoundError:
1043 return False
1041 return False
1044
1042
1045 def __contains__(self, path):
1043 def __contains__(self, path):
1046 '''Checks if the store contains path'''
1044 '''Checks if the store contains path'''
1047 path = b"/".join((b"data", path))
1045 path = b"/".join((b"data", path))
1048 # check for files (exact match)
1046 # check for files (exact match)
1049 e = path + b'.i'
1047 e = path + b'.i'
1050 if e in self.fncache and self._exists(e):
1048 if e in self.fncache and self._exists(e):
1051 return True
1049 return True
1052 # now check for directories (prefix match)
1050 # now check for directories (prefix match)
1053 if not path.endswith(b'/'):
1051 if not path.endswith(b'/'):
1054 path += b'/'
1052 path += b'/'
1055 for e in self.fncache:
1053 for e in self.fncache:
1056 if e.startswith(path) and self._exists(e):
1054 if e.startswith(path) and self._exists(e):
1057 return True
1055 return True
1058 return False
1056 return False
General Comments 0
You need to be logged in to leave comments. Login now