##// END OF EJS Templates
store: refactor space delimited list to proper data structure...
Pulkit Goyal -
r45911:909dafff default
parent child Browse files
Show More
@@ -1,730 +1,744 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import errno
10 import errno
11 import functools
11 import functools
12 import os
12 import os
13 import stat
13 import stat
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from . import (
17 from . import (
18 changelog,
18 changelog,
19 error,
19 error,
20 manifest,
20 manifest,
21 node,
21 node,
22 policy,
22 policy,
23 pycompat,
23 pycompat,
24 util,
24 util,
25 vfs as vfsmod,
25 vfs as vfsmod,
26 )
26 )
27 from .utils import hashutil
27 from .utils import hashutil
28
28
29 parsers = policy.importmod('parsers')
29 parsers = policy.importmod('parsers')
30 # how much bytes should be read from fncache in one read
30 # how much bytes should be read from fncache in one read
31 # It is done to prevent loading large fncache files into memory
31 # It is done to prevent loading large fncache files into memory
32 fncache_chunksize = 10 ** 6
32 fncache_chunksize = 10 ** 6
33
33
34
34
35 def _matchtrackedpath(path, matcher):
35 def _matchtrackedpath(path, matcher):
36 """parses a fncache entry and returns whether the entry is tracking a path
36 """parses a fncache entry and returns whether the entry is tracking a path
37 matched by matcher or not.
37 matched by matcher or not.
38
38
39 If matcher is None, returns True"""
39 If matcher is None, returns True"""
40
40
41 if matcher is None:
41 if matcher is None:
42 return True
42 return True
43 path = decodedir(path)
43 path = decodedir(path)
44 if path.startswith(b'data/'):
44 if path.startswith(b'data/'):
45 return matcher(path[len(b'data/') : -len(b'.i')])
45 return matcher(path[len(b'data/') : -len(b'.i')])
46 elif path.startswith(b'meta/'):
46 elif path.startswith(b'meta/'):
47 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
47 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48
48
49 raise error.ProgrammingError(b"cannot decode path %s" % path)
49 raise error.ProgrammingError(b"cannot decode path %s" % path)
50
50
51
51
52 # This avoids a collision between a file named foo and a dir named
52 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
53 # foo.i or foo.d
54 def _encodedir(path):
54 def _encodedir(path):
55 '''
55 '''
56 >>> _encodedir(b'data/foo.i')
56 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
57 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
58 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
59 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
61 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 '''
64 '''
65 return (
65 return (
66 path.replace(b".hg/", b".hg.hg/")
66 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
67 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
68 .replace(b".d/", b".d.hg/")
69 )
69 )
70
70
71
71
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
73
74
74
75 def decodedir(path):
75 def decodedir(path):
76 '''
76 '''
77 >>> decodedir(b'data/foo.i')
77 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
78 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
80 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
82 'data/foo.i.hg/bla.i'
83 '''
83 '''
84 if b".hg/" not in path:
84 if b".hg/" not in path:
85 return path
85 return path
86 return (
86 return (
87 path.replace(b".d.hg/", b".d/")
87 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
88 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
89 .replace(b".hg.hg/", b".hg/")
90 )
90 )
91
91
92
92
93 def _reserved():
93 def _reserved():
94 ''' characters that are problematic for filesystems
94 ''' characters that are problematic for filesystems
95
95
96 * ascii escapes (0..31)
96 * ascii escapes (0..31)
97 * ascii hi (126..255)
97 * ascii hi (126..255)
98 * windows specials
98 * windows specials
99
99
100 these characters will be escaped by encodefunctions
100 these characters will be escaped by encodefunctions
101 '''
101 '''
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
103 for x in range(32):
104 yield x
104 yield x
105 for x in range(126, 256):
105 for x in range(126, 256):
106 yield x
106 yield x
107 for x in winreserved:
107 for x in winreserved:
108 yield x
108 yield x
109
109
110
110
111 def _buildencodefun():
111 def _buildencodefun():
112 '''
112 '''
113 >>> enc, dec = _buildencodefun()
113 >>> enc, dec = _buildencodefun()
114
114
115 >>> enc(b'nothing/special.txt')
115 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
116 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
117 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
118 'nothing/special.txt'
119
119
120 >>> enc(b'HELLO')
120 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
121 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
122 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
123 'HELLO'
124
124
125 >>> enc(b'hello:world?')
125 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
126 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
127 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
128 'hello:world?'
129
129
130 >>> enc(b'the\\x07quick\\xADshot')
130 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
131 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
132 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
133 'the\\x07quick\\xadshot'
134 '''
134 '''
135 e = b'_'
135 e = b'_'
136 xchr = pycompat.bytechr
136 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
137 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
139
140 cmap = {x: x for x in asciistr}
140 cmap = {x: x for x in asciistr}
141 for x in _reserved():
141 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
142 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
143 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
144 cmap[xchr(x)] = e + xchr(x).lower()
145
145
146 dmap = {}
146 dmap = {}
147 for k, v in pycompat.iteritems(cmap):
147 for k, v in pycompat.iteritems(cmap):
148 dmap[v] = k
148 dmap[v] = k
149
149
150 def decode(s):
150 def decode(s):
151 i = 0
151 i = 0
152 while i < len(s):
152 while i < len(s):
153 for l in pycompat.xrange(1, 4):
153 for l in pycompat.xrange(1, 4):
154 try:
154 try:
155 yield dmap[s[i : i + l]]
155 yield dmap[s[i : i + l]]
156 i += l
156 i += l
157 break
157 break
158 except KeyError:
158 except KeyError:
159 pass
159 pass
160 else:
160 else:
161 raise KeyError
161 raise KeyError
162
162
163 return (
163 return (
164 lambda s: b''.join(
164 lambda s: b''.join(
165 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
165 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
166 ),
166 ),
167 lambda s: b''.join(list(decode(s))),
167 lambda s: b''.join(list(decode(s))),
168 )
168 )
169
169
170
170
171 _encodefname, _decodefname = _buildencodefun()
171 _encodefname, _decodefname = _buildencodefun()
172
172
173
173
174 def encodefilename(s):
174 def encodefilename(s):
175 '''
175 '''
176 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
177 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
178 '''
178 '''
179 return _encodefname(encodedir(s))
179 return _encodefname(encodedir(s))
180
180
181
181
182 def decodefilename(s):
182 def decodefilename(s):
183 '''
183 '''
184 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
185 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
186 '''
186 '''
187 return decodedir(_decodefname(s))
187 return decodedir(_decodefname(s))
188
188
189
189
190 def _buildlowerencodefun():
190 def _buildlowerencodefun():
191 '''
191 '''
192 >>> f = _buildlowerencodefun()
192 >>> f = _buildlowerencodefun()
193 >>> f(b'nothing/special.txt')
193 >>> f(b'nothing/special.txt')
194 'nothing/special.txt'
194 'nothing/special.txt'
195 >>> f(b'HELLO')
195 >>> f(b'HELLO')
196 'hello'
196 'hello'
197 >>> f(b'hello:world?')
197 >>> f(b'hello:world?')
198 'hello~3aworld~3f'
198 'hello~3aworld~3f'
199 >>> f(b'the\\x07quick\\xADshot')
199 >>> f(b'the\\x07quick\\xADshot')
200 'the~07quick~adshot'
200 'the~07quick~adshot'
201 '''
201 '''
202 xchr = pycompat.bytechr
202 xchr = pycompat.bytechr
203 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
203 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
204 for x in _reserved():
204 for x in _reserved():
205 cmap[xchr(x)] = b"~%02x" % x
205 cmap[xchr(x)] = b"~%02x" % x
206 for x in range(ord(b"A"), ord(b"Z") + 1):
206 for x in range(ord(b"A"), ord(b"Z") + 1):
207 cmap[xchr(x)] = xchr(x).lower()
207 cmap[xchr(x)] = xchr(x).lower()
208
208
209 def lowerencode(s):
209 def lowerencode(s):
210 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
211
211
212 return lowerencode
212 return lowerencode
213
213
214
214
215 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
216
216
217 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
218 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
219 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
220
220
221
221
222 def _auxencode(path, dotencode):
222 def _auxencode(path, dotencode):
223 '''
223 '''
224 Encodes filenames containing names reserved by Windows or which end in
224 Encodes filenames containing names reserved by Windows or which end in
225 period or space. Does not touch other single reserved characters c.
225 period or space. Does not touch other single reserved characters c.
226 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
227 Additionally encodes space or period at the beginning, if dotencode is
227 Additionally encodes space or period at the beginning, if dotencode is
228 True. Parameter path is assumed to be all lowercase.
228 True. Parameter path is assumed to be all lowercase.
229 A segment only needs encoding if a reserved name appears as a
229 A segment only needs encoding if a reserved name appears as a
230 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
231 doesn't need encoding.
231 doesn't need encoding.
232
232
233 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
234 >>> _auxencode(s.split(b'/'), True)
234 >>> _auxencode(s.split(b'/'), True)
235 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
236 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
237 >>> _auxencode(s.split(b'/'), False)
237 >>> _auxencode(s.split(b'/'), False)
238 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
239 >>> _auxencode([b'foo. '], True)
239 >>> _auxencode([b'foo. '], True)
240 ['foo.~20']
240 ['foo.~20']
241 >>> _auxencode([b' .foo'], True)
241 >>> _auxencode([b' .foo'], True)
242 ['~20.foo']
242 ['~20.foo']
243 '''
243 '''
244 for i, n in enumerate(path):
244 for i, n in enumerate(path):
245 if not n:
245 if not n:
246 continue
246 continue
247 if dotencode and n[0] in b'. ':
247 if dotencode and n[0] in b'. ':
248 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 n = b"~%02x" % ord(n[0:1]) + n[1:]
249 path[i] = n
249 path[i] = n
250 else:
250 else:
251 l = n.find(b'.')
251 l = n.find(b'.')
252 if l == -1:
252 if l == -1:
253 l = len(n)
253 l = len(n)
254 if (l == 3 and n[:3] in _winres3) or (
254 if (l == 3 and n[:3] in _winres3) or (
255 l == 4
255 l == 4
256 and n[3:4] <= b'9'
256 and n[3:4] <= b'9'
257 and n[3:4] >= b'1'
257 and n[3:4] >= b'1'
258 and n[:3] in _winres4
258 and n[:3] in _winres4
259 ):
259 ):
260 # encode third letter ('aux' -> 'au~78')
260 # encode third letter ('aux' -> 'au~78')
261 ec = b"~%02x" % ord(n[2:3])
261 ec = b"~%02x" % ord(n[2:3])
262 n = n[0:2] + ec + n[3:]
262 n = n[0:2] + ec + n[3:]
263 path[i] = n
263 path[i] = n
264 if n[-1] in b'. ':
264 if n[-1] in b'. ':
265 # encode last period or space ('foo...' -> 'foo..~2e')
265 # encode last period or space ('foo...' -> 'foo..~2e')
266 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
267 return path
267 return path
268
268
269
269
270 _maxstorepathlen = 120
270 _maxstorepathlen = 120
271 _dirprefixlen = 8
271 _dirprefixlen = 8
272 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
273
273
274
274
275 def _hashencode(path, dotencode):
275 def _hashencode(path, dotencode):
276 digest = node.hex(hashutil.sha1(path).digest())
276 digest = node.hex(hashutil.sha1(path).digest())
277 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
278 parts = _auxencode(le, dotencode)
278 parts = _auxencode(le, dotencode)
279 basename = parts[-1]
279 basename = parts[-1]
280 _root, ext = os.path.splitext(basename)
280 _root, ext = os.path.splitext(basename)
281 sdirs = []
281 sdirs = []
282 sdirslen = 0
282 sdirslen = 0
283 for p in parts[:-1]:
283 for p in parts[:-1]:
284 d = p[:_dirprefixlen]
284 d = p[:_dirprefixlen]
285 if d[-1] in b'. ':
285 if d[-1] in b'. ':
286 # Windows can't access dirs ending in period or space
286 # Windows can't access dirs ending in period or space
287 d = d[:-1] + b'_'
287 d = d[:-1] + b'_'
288 if sdirslen == 0:
288 if sdirslen == 0:
289 t = len(d)
289 t = len(d)
290 else:
290 else:
291 t = sdirslen + 1 + len(d)
291 t = sdirslen + 1 + len(d)
292 if t > _maxshortdirslen:
292 if t > _maxshortdirslen:
293 break
293 break
294 sdirs.append(d)
294 sdirs.append(d)
295 sdirslen = t
295 sdirslen = t
296 dirs = b'/'.join(sdirs)
296 dirs = b'/'.join(sdirs)
297 if len(dirs) > 0:
297 if len(dirs) > 0:
298 dirs += b'/'
298 dirs += b'/'
299 res = b'dh/' + dirs + digest + ext
299 res = b'dh/' + dirs + digest + ext
300 spaceleft = _maxstorepathlen - len(res)
300 spaceleft = _maxstorepathlen - len(res)
301 if spaceleft > 0:
301 if spaceleft > 0:
302 filler = basename[:spaceleft]
302 filler = basename[:spaceleft]
303 res = b'dh/' + dirs + filler + digest + ext
303 res = b'dh/' + dirs + filler + digest + ext
304 return res
304 return res
305
305
306
306
307 def _hybridencode(path, dotencode):
307 def _hybridencode(path, dotencode):
308 '''encodes path with a length limit
308 '''encodes path with a length limit
309
309
310 Encodes all paths that begin with 'data/', according to the following.
310 Encodes all paths that begin with 'data/', according to the following.
311
311
312 Default encoding (reversible):
312 Default encoding (reversible):
313
313
314 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
315 characters are encoded as '~xx', where xx is the two digit hex code
315 characters are encoded as '~xx', where xx is the two digit hex code
316 of the character (see encodefilename).
316 of the character (see encodefilename).
317 Relevant path components consisting of Windows reserved filenames are
317 Relevant path components consisting of Windows reserved filenames are
318 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
319
319
320 Hashed encoding (not reversible):
320 Hashed encoding (not reversible):
321
321
322 If the default-encoded path is longer than _maxstorepathlen, a
322 If the default-encoded path is longer than _maxstorepathlen, a
323 non-reversible hybrid hashing of the path is done instead.
323 non-reversible hybrid hashing of the path is done instead.
324 This encoding uses up to _dirprefixlen characters of all directory
324 This encoding uses up to _dirprefixlen characters of all directory
325 levels of the lowerencoded path, but not more levels than can fit into
325 levels of the lowerencoded path, but not more levels than can fit into
326 _maxshortdirslen.
326 _maxshortdirslen.
327 Then follows the filler followed by the sha digest of the full path.
327 Then follows the filler followed by the sha digest of the full path.
328 The filler is the beginning of the basename of the lowerencoded path
328 The filler is the beginning of the basename of the lowerencoded path
329 (the basename is everything after the last path separator). The filler
329 (the basename is everything after the last path separator). The filler
330 is as long as possible, filling in characters from the basename until
330 is as long as possible, filling in characters from the basename until
331 the encoded path has _maxstorepathlen characters (or all chars of the
331 the encoded path has _maxstorepathlen characters (or all chars of the
332 basename have been taken).
332 basename have been taken).
333 The extension (e.g. '.i' or '.d') is preserved.
333 The extension (e.g. '.i' or '.d') is preserved.
334
334
335 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
336 encoding was used.
336 encoding was used.
337 '''
337 '''
338 path = encodedir(path)
338 path = encodedir(path)
339 ef = _encodefname(path).split(b'/')
339 ef = _encodefname(path).split(b'/')
340 res = b'/'.join(_auxencode(ef, dotencode))
340 res = b'/'.join(_auxencode(ef, dotencode))
341 if len(res) > _maxstorepathlen:
341 if len(res) > _maxstorepathlen:
342 res = _hashencode(path, dotencode)
342 res = _hashencode(path, dotencode)
343 return res
343 return res
344
344
345
345
346 def _pathencode(path):
346 def _pathencode(path):
347 de = encodedir(path)
347 de = encodedir(path)
348 if len(path) > _maxstorepathlen:
348 if len(path) > _maxstorepathlen:
349 return _hashencode(de, True)
349 return _hashencode(de, True)
350 ef = _encodefname(de).split(b'/')
350 ef = _encodefname(de).split(b'/')
351 res = b'/'.join(_auxencode(ef, True))
351 res = b'/'.join(_auxencode(ef, True))
352 if len(res) > _maxstorepathlen:
352 if len(res) > _maxstorepathlen:
353 return _hashencode(de, True)
353 return _hashencode(de, True)
354 return res
354 return res
355
355
356
356
357 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 _pathencode = getattr(parsers, 'pathencode', _pathencode)
358
358
359
359
360 def _plainhybridencode(f):
360 def _plainhybridencode(f):
361 return _hybridencode(f, False)
361 return _hybridencode(f, False)
362
362
363
363
364 def _calcmode(vfs):
364 def _calcmode(vfs):
365 try:
365 try:
366 # files in .hg/ will be created using this mode
366 # files in .hg/ will be created using this mode
367 mode = vfs.stat().st_mode
367 mode = vfs.stat().st_mode
368 # avoid some useless chmods
368 # avoid some useless chmods
369 if (0o777 & ~util.umask) == (0o777 & mode):
369 if (0o777 & ~util.umask) == (0o777 & mode):
370 mode = None
370 mode = None
371 except OSError:
371 except OSError:
372 mode = None
372 mode = None
373 return mode
373 return mode
374
374
375
375
376 _data = (
376 _data = [
377 b'bookmarks narrowspec data meta 00manifest.d 00manifest.i'
377 b'bookmarks',
378 b' 00changelog.d 00changelog.i phaseroots obsstore'
378 b'narrowspec',
379 )
379 b'data',
380 b'meta',
381 b'00manifest.d',
382 b'00manifest.i',
383 b'00changelog.d',
384 b'00changelog.i',
385 b'phaseroots',
386 b'obsstore',
387 ]
380
388
381
389
382 def isrevlog(f, kind, st):
390 def isrevlog(f, kind, st):
383 return kind == stat.S_IFREG and f[-2:] in (b'.i', b'.d')
391 return kind == stat.S_IFREG and f[-2:] in (b'.i', b'.d')
384
392
385
393
386 class basicstore(object):
394 class basicstore(object):
387 '''base class for local repository stores'''
395 '''base class for local repository stores'''
388
396
389 def __init__(self, path, vfstype):
397 def __init__(self, path, vfstype):
390 vfs = vfstype(path)
398 vfs = vfstype(path)
391 self.path = vfs.base
399 self.path = vfs.base
392 self.createmode = _calcmode(vfs)
400 self.createmode = _calcmode(vfs)
393 vfs.createmode = self.createmode
401 vfs.createmode = self.createmode
394 self.rawvfs = vfs
402 self.rawvfs = vfs
395 self.vfs = vfsmod.filtervfs(vfs, encodedir)
403 self.vfs = vfsmod.filtervfs(vfs, encodedir)
396 self.opener = self.vfs
404 self.opener = self.vfs
397
405
398 def join(self, f):
406 def join(self, f):
399 return self.path + b'/' + encodedir(f)
407 return self.path + b'/' + encodedir(f)
400
408
401 def _walk(self, relpath, recurse, filefilter=isrevlog):
409 def _walk(self, relpath, recurse, filefilter=isrevlog):
402 '''yields (unencoded, encoded, size)'''
410 '''yields (unencoded, encoded, size)'''
403 path = self.path
411 path = self.path
404 if relpath:
412 if relpath:
405 path += b'/' + relpath
413 path += b'/' + relpath
406 striplen = len(self.path) + 1
414 striplen = len(self.path) + 1
407 l = []
415 l = []
408 if self.rawvfs.isdir(path):
416 if self.rawvfs.isdir(path):
409 visit = [path]
417 visit = [path]
410 readdir = self.rawvfs.readdir
418 readdir = self.rawvfs.readdir
411 while visit:
419 while visit:
412 p = visit.pop()
420 p = visit.pop()
413 for f, kind, st in readdir(p, stat=True):
421 for f, kind, st in readdir(p, stat=True):
414 fp = p + b'/' + f
422 fp = p + b'/' + f
415 if filefilter(f, kind, st):
423 if filefilter(f, kind, st):
416 n = util.pconvert(fp[striplen:])
424 n = util.pconvert(fp[striplen:])
417 l.append((decodedir(n), n, st.st_size))
425 l.append((decodedir(n), n, st.st_size))
418 elif kind == stat.S_IFDIR and recurse:
426 elif kind == stat.S_IFDIR and recurse:
419 visit.append(fp)
427 visit.append(fp)
420 l.sort()
428 l.sort()
421 return l
429 return l
422
430
423 def changelog(self, trypending):
431 def changelog(self, trypending):
424 return changelog.changelog(self.vfs, trypending=trypending)
432 return changelog.changelog(self.vfs, trypending=trypending)
425
433
426 def manifestlog(self, repo, storenarrowmatch):
434 def manifestlog(self, repo, storenarrowmatch):
427 rootstore = manifest.manifestrevlog(self.vfs)
435 rootstore = manifest.manifestrevlog(self.vfs)
428 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
436 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
429
437
430 def datafiles(self, matcher=None):
438 def datafiles(self, matcher=None):
431 return self._walk(b'data', True) + self._walk(b'meta', True)
439 return self._walk(b'data', True) + self._walk(b'meta', True)
432
440
433 def topfiles(self):
441 def topfiles(self):
434 # yield manifest before changelog
442 # yield manifest before changelog
435 return reversed(self._walk(b'', False))
443 return reversed(self._walk(b'', False))
436
444
437 def walk(self, matcher=None):
445 def walk(self, matcher=None):
438 '''yields (unencoded, encoded, size)
446 '''yields (unencoded, encoded, size)
439
447
440 if a matcher is passed, storage files of only those tracked paths
448 if a matcher is passed, storage files of only those tracked paths
441 are passed with matches the matcher
449 are passed with matches the matcher
442 '''
450 '''
443 # yield data files first
451 # yield data files first
444 for x in self.datafiles(matcher):
452 for x in self.datafiles(matcher):
445 yield x
453 yield x
446 for x in self.topfiles():
454 for x in self.topfiles():
447 yield x
455 yield x
448
456
449 def copylist(self):
457 def copylist(self):
450 return [b'requires'] + _data.split()
458 return [b'requires'] + _data
451
459
452 def write(self, tr):
460 def write(self, tr):
453 pass
461 pass
454
462
455 def invalidatecaches(self):
463 def invalidatecaches(self):
456 pass
464 pass
457
465
458 def markremoved(self, fn):
466 def markremoved(self, fn):
459 pass
467 pass
460
468
461 def __contains__(self, path):
469 def __contains__(self, path):
462 '''Checks if the store contains path'''
470 '''Checks if the store contains path'''
463 path = b"/".join((b"data", path))
471 path = b"/".join((b"data", path))
464 # file?
472 # file?
465 if self.vfs.exists(path + b".i"):
473 if self.vfs.exists(path + b".i"):
466 return True
474 return True
467 # dir?
475 # dir?
468 if not path.endswith(b"/"):
476 if not path.endswith(b"/"):
469 path = path + b"/"
477 path = path + b"/"
470 return self.vfs.exists(path)
478 return self.vfs.exists(path)
471
479
472
480
473 class encodedstore(basicstore):
481 class encodedstore(basicstore):
474 def __init__(self, path, vfstype):
482 def __init__(self, path, vfstype):
475 vfs = vfstype(path + b'/store')
483 vfs = vfstype(path + b'/store')
476 self.path = vfs.base
484 self.path = vfs.base
477 self.createmode = _calcmode(vfs)
485 self.createmode = _calcmode(vfs)
478 vfs.createmode = self.createmode
486 vfs.createmode = self.createmode
479 self.rawvfs = vfs
487 self.rawvfs = vfs
480 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
488 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
481 self.opener = self.vfs
489 self.opener = self.vfs
482
490
483 def datafiles(self, matcher=None):
491 def datafiles(self, matcher=None):
484 for a, b, size in super(encodedstore, self).datafiles():
492 for a, b, size in super(encodedstore, self).datafiles():
485 try:
493 try:
486 a = decodefilename(a)
494 a = decodefilename(a)
487 except KeyError:
495 except KeyError:
488 a = None
496 a = None
489 if a is not None and not _matchtrackedpath(a, matcher):
497 if a is not None and not _matchtrackedpath(a, matcher):
490 continue
498 continue
491 yield a, b, size
499 yield a, b, size
492
500
493 def join(self, f):
501 def join(self, f):
494 return self.path + b'/' + encodefilename(f)
502 return self.path + b'/' + encodefilename(f)
495
503
496 def copylist(self):
504 def copylist(self):
497 return [b'requires', b'00changelog.i'] + [
505 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
498 b'store/' + f for f in _data.split()
499 ]
500
506
501
507
502 class fncache(object):
508 class fncache(object):
503 # the filename used to be partially encoded
509 # the filename used to be partially encoded
504 # hence the encodedir/decodedir dance
510 # hence the encodedir/decodedir dance
505 def __init__(self, vfs):
511 def __init__(self, vfs):
506 self.vfs = vfs
512 self.vfs = vfs
507 self.entries = None
513 self.entries = None
508 self._dirty = False
514 self._dirty = False
509 # set of new additions to fncache
515 # set of new additions to fncache
510 self.addls = set()
516 self.addls = set()
511
517
512 def ensureloaded(self, warn=None):
518 def ensureloaded(self, warn=None):
513 '''read the fncache file if not already read.
519 '''read the fncache file if not already read.
514
520
515 If the file on disk is corrupted, raise. If warn is provided,
521 If the file on disk is corrupted, raise. If warn is provided,
516 warn and keep going instead.'''
522 warn and keep going instead.'''
517 if self.entries is None:
523 if self.entries is None:
518 self._load(warn)
524 self._load(warn)
519
525
520 def _load(self, warn=None):
526 def _load(self, warn=None):
521 '''fill the entries from the fncache file'''
527 '''fill the entries from the fncache file'''
522 self._dirty = False
528 self._dirty = False
523 try:
529 try:
524 fp = self.vfs(b'fncache', mode=b'rb')
530 fp = self.vfs(b'fncache', mode=b'rb')
525 except IOError:
531 except IOError:
526 # skip nonexistent file
532 # skip nonexistent file
527 self.entries = set()
533 self.entries = set()
528 return
534 return
529
535
530 self.entries = set()
536 self.entries = set()
531 chunk = b''
537 chunk = b''
532 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
538 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
533 chunk += c
539 chunk += c
534 try:
540 try:
535 p = chunk.rindex(b'\n')
541 p = chunk.rindex(b'\n')
536 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
542 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
537 chunk = chunk[p + 1 :]
543 chunk = chunk[p + 1 :]
538 except ValueError:
544 except ValueError:
539 # substring '\n' not found, maybe the entry is bigger than the
545 # substring '\n' not found, maybe the entry is bigger than the
540 # chunksize, so let's keep iterating
546 # chunksize, so let's keep iterating
541 pass
547 pass
542
548
543 if chunk:
549 if chunk:
544 msg = _(b"fncache does not ends with a newline")
550 msg = _(b"fncache does not ends with a newline")
545 if warn:
551 if warn:
546 warn(msg + b'\n')
552 warn(msg + b'\n')
547 else:
553 else:
548 raise error.Abort(
554 raise error.Abort(
549 msg,
555 msg,
550 hint=_(
556 hint=_(
551 b"use 'hg debugrebuildfncache' to "
557 b"use 'hg debugrebuildfncache' to "
552 b"rebuild the fncache"
558 b"rebuild the fncache"
553 ),
559 ),
554 )
560 )
555 self._checkentries(fp, warn)
561 self._checkentries(fp, warn)
556 fp.close()
562 fp.close()
557
563
558 def _checkentries(self, fp, warn):
564 def _checkentries(self, fp, warn):
559 """ make sure there is no empty string in entries """
565 """ make sure there is no empty string in entries """
560 if b'' in self.entries:
566 if b'' in self.entries:
561 fp.seek(0)
567 fp.seek(0)
562 for n, line in enumerate(util.iterfile(fp)):
568 for n, line in enumerate(util.iterfile(fp)):
563 if not line.rstrip(b'\n'):
569 if not line.rstrip(b'\n'):
564 t = _(b'invalid entry in fncache, line %d') % (n + 1)
570 t = _(b'invalid entry in fncache, line %d') % (n + 1)
565 if warn:
571 if warn:
566 warn(t + b'\n')
572 warn(t + b'\n')
567 else:
573 else:
568 raise error.Abort(t)
574 raise error.Abort(t)
569
575
570 def write(self, tr):
576 def write(self, tr):
571 if self._dirty:
577 if self._dirty:
572 assert self.entries is not None
578 assert self.entries is not None
573 self.entries = self.entries | self.addls
579 self.entries = self.entries | self.addls
574 self.addls = set()
580 self.addls = set()
575 tr.addbackup(b'fncache')
581 tr.addbackup(b'fncache')
576 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
582 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
577 if self.entries:
583 if self.entries:
578 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
584 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
579 fp.close()
585 fp.close()
580 self._dirty = False
586 self._dirty = False
581 if self.addls:
587 if self.addls:
582 # if we have just new entries, let's append them to the fncache
588 # if we have just new entries, let's append them to the fncache
583 tr.addbackup(b'fncache')
589 tr.addbackup(b'fncache')
584 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
590 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
585 if self.addls:
591 if self.addls:
586 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
592 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
587 fp.close()
593 fp.close()
588 self.entries = None
594 self.entries = None
589 self.addls = set()
595 self.addls = set()
590
596
591 def add(self, fn):
597 def add(self, fn):
592 if self.entries is None:
598 if self.entries is None:
593 self._load()
599 self._load()
594 if fn not in self.entries:
600 if fn not in self.entries:
595 self.addls.add(fn)
601 self.addls.add(fn)
596
602
597 def remove(self, fn):
603 def remove(self, fn):
598 if self.entries is None:
604 if self.entries is None:
599 self._load()
605 self._load()
600 if fn in self.addls:
606 if fn in self.addls:
601 self.addls.remove(fn)
607 self.addls.remove(fn)
602 return
608 return
603 try:
609 try:
604 self.entries.remove(fn)
610 self.entries.remove(fn)
605 self._dirty = True
611 self._dirty = True
606 except KeyError:
612 except KeyError:
607 pass
613 pass
608
614
609 def __contains__(self, fn):
615 def __contains__(self, fn):
610 if fn in self.addls:
616 if fn in self.addls:
611 return True
617 return True
612 if self.entries is None:
618 if self.entries is None:
613 self._load()
619 self._load()
614 return fn in self.entries
620 return fn in self.entries
615
621
616 def __iter__(self):
622 def __iter__(self):
617 if self.entries is None:
623 if self.entries is None:
618 self._load()
624 self._load()
619 return iter(self.entries | self.addls)
625 return iter(self.entries | self.addls)
620
626
621
627
622 class _fncachevfs(vfsmod.proxyvfs):
628 class _fncachevfs(vfsmod.proxyvfs):
623 def __init__(self, vfs, fnc, encode):
629 def __init__(self, vfs, fnc, encode):
624 vfsmod.proxyvfs.__init__(self, vfs)
630 vfsmod.proxyvfs.__init__(self, vfs)
625 self.fncache = fnc
631 self.fncache = fnc
626 self.encode = encode
632 self.encode = encode
627
633
628 def __call__(self, path, mode=b'r', *args, **kw):
634 def __call__(self, path, mode=b'r', *args, **kw):
629 encoded = self.encode(path)
635 encoded = self.encode(path)
630 if mode not in (b'r', b'rb') and (
636 if mode not in (b'r', b'rb') and (
631 path.startswith(b'data/') or path.startswith(b'meta/')
637 path.startswith(b'data/') or path.startswith(b'meta/')
632 ):
638 ):
633 # do not trigger a fncache load when adding a file that already is
639 # do not trigger a fncache load when adding a file that already is
634 # known to exist.
640 # known to exist.
635 notload = self.fncache.entries is None and self.vfs.exists(encoded)
641 notload = self.fncache.entries is None and self.vfs.exists(encoded)
636 if notload and b'a' in mode and not self.vfs.stat(encoded).st_size:
642 if notload and b'a' in mode and not self.vfs.stat(encoded).st_size:
637 # when appending to an existing file, if the file has size zero,
643 # when appending to an existing file, if the file has size zero,
638 # it should be considered as missing. Such zero-size files are
644 # it should be considered as missing. Such zero-size files are
639 # the result of truncation when a transaction is aborted.
645 # the result of truncation when a transaction is aborted.
640 notload = False
646 notload = False
641 if not notload:
647 if not notload:
642 self.fncache.add(path)
648 self.fncache.add(path)
643 return self.vfs(encoded, mode, *args, **kw)
649 return self.vfs(encoded, mode, *args, **kw)
644
650
645 def join(self, path):
651 def join(self, path):
646 if path:
652 if path:
647 return self.vfs.join(self.encode(path))
653 return self.vfs.join(self.encode(path))
648 else:
654 else:
649 return self.vfs.join(path)
655 return self.vfs.join(path)
650
656
651
657
652 class fncachestore(basicstore):
658 class fncachestore(basicstore):
653 def __init__(self, path, vfstype, dotencode):
659 def __init__(self, path, vfstype, dotencode):
654 if dotencode:
660 if dotencode:
655 encode = _pathencode
661 encode = _pathencode
656 else:
662 else:
657 encode = _plainhybridencode
663 encode = _plainhybridencode
658 self.encode = encode
664 self.encode = encode
659 vfs = vfstype(path + b'/store')
665 vfs = vfstype(path + b'/store')
660 self.path = vfs.base
666 self.path = vfs.base
661 self.pathsep = self.path + b'/'
667 self.pathsep = self.path + b'/'
662 self.createmode = _calcmode(vfs)
668 self.createmode = _calcmode(vfs)
663 vfs.createmode = self.createmode
669 vfs.createmode = self.createmode
664 self.rawvfs = vfs
670 self.rawvfs = vfs
665 fnc = fncache(vfs)
671 fnc = fncache(vfs)
666 self.fncache = fnc
672 self.fncache = fnc
667 self.vfs = _fncachevfs(vfs, fnc, encode)
673 self.vfs = _fncachevfs(vfs, fnc, encode)
668 self.opener = self.vfs
674 self.opener = self.vfs
669
675
670 def join(self, f):
676 def join(self, f):
671 return self.pathsep + self.encode(f)
677 return self.pathsep + self.encode(f)
672
678
673 def getsize(self, path):
679 def getsize(self, path):
674 return self.rawvfs.stat(path).st_size
680 return self.rawvfs.stat(path).st_size
675
681
676 def datafiles(self, matcher=None):
682 def datafiles(self, matcher=None):
677 for f in sorted(self.fncache):
683 for f in sorted(self.fncache):
678 if not _matchtrackedpath(f, matcher):
684 if not _matchtrackedpath(f, matcher):
679 continue
685 continue
680 ef = self.encode(f)
686 ef = self.encode(f)
681 try:
687 try:
682 yield f, ef, self.getsize(ef)
688 yield f, ef, self.getsize(ef)
683 except OSError as err:
689 except OSError as err:
684 if err.errno != errno.ENOENT:
690 if err.errno != errno.ENOENT:
685 raise
691 raise
686
692
687 def copylist(self):
693 def copylist(self):
688 d = (
694 d = (
689 b'bookmarks narrowspec data meta dh fncache phaseroots obsstore'
695 b'bookmarks',
690 b' 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
696 b'narrowspec',
697 b'data',
698 b'meta',
699 b'dh',
700 b'fncache',
701 b'phaseroots',
702 b'obsstore',
703 b'00manifest.d',
704 b'00manifest.i',
705 b'00changelog.d',
706 b'00changelog.i',
691 )
707 )
692 return [b'requires', b'00changelog.i'] + [
708 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
693 b'store/' + f for f in d.split()
694 ]
695
709
696 def write(self, tr):
710 def write(self, tr):
697 self.fncache.write(tr)
711 self.fncache.write(tr)
698
712
699 def invalidatecaches(self):
713 def invalidatecaches(self):
700 self.fncache.entries = None
714 self.fncache.entries = None
701 self.fncache.addls = set()
715 self.fncache.addls = set()
702
716
703 def markremoved(self, fn):
717 def markremoved(self, fn):
704 self.fncache.remove(fn)
718 self.fncache.remove(fn)
705
719
706 def _exists(self, f):
720 def _exists(self, f):
707 ef = self.encode(f)
721 ef = self.encode(f)
708 try:
722 try:
709 self.getsize(ef)
723 self.getsize(ef)
710 return True
724 return True
711 except OSError as err:
725 except OSError as err:
712 if err.errno != errno.ENOENT:
726 if err.errno != errno.ENOENT:
713 raise
727 raise
714 # nonexistent entry
728 # nonexistent entry
715 return False
729 return False
716
730
717 def __contains__(self, path):
731 def __contains__(self, path):
718 '''Checks if the store contains path'''
732 '''Checks if the store contains path'''
719 path = b"/".join((b"data", path))
733 path = b"/".join((b"data", path))
720 # check for files (exact match)
734 # check for files (exact match)
721 e = path + b'.i'
735 e = path + b'.i'
722 if e in self.fncache and self._exists(e):
736 if e in self.fncache and self._exists(e):
723 return True
737 return True
724 # now check for directories (prefix match)
738 # now check for directories (prefix match)
725 if not path.endswith(b'/'):
739 if not path.endswith(b'/'):
726 path += b'/'
740 path += b'/'
727 for e in self.fncache:
741 for e in self.fncache:
728 if e.startswith(path) and self._exists(e):
742 if e.startswith(path) and self._exists(e):
729 return True
743 return True
730 return False
744 return False
General Comments 0
You need to be logged in to leave comments. Login now