##// END OF EJS Templates
store: lazily get file size on demand for the fncache case...
marmoute -
r51370:4cbdfab6 default
parent child Browse files
Show More
@@ -1,963 +1,967 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # files that are "volatile" and might change between listing and streaming
398 # files that are "volatile" and might change between listing and streaming
399 #
399 #
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 # deleted.
401 # deleted.
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403
403
404 # some exception to the above matching
404 # some exception to the above matching
405 #
405 #
406 # XXX This is currently not in use because of issue6542
406 # XXX This is currently not in use because of issue6542
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408
408
409
409
410 def is_revlog(f, kind, st):
410 def is_revlog(f, kind, st):
411 if kind != stat.S_IFREG:
411 if kind != stat.S_IFREG:
412 return None
412 return None
413 return revlog_type(f)
413 return revlog_type(f)
414
414
415
415
416 def revlog_type(f):
416 def revlog_type(f):
417 # XXX we need to filter `undo.` created by the transaction here, however
417 # XXX we need to filter `undo.` created by the transaction here, however
418 # being naive about it also filter revlog for `undo.*` files, leading to
418 # being naive about it also filter revlog for `undo.*` files, leading to
419 # issue6542. So we no longer use EXCLUDED.
419 # issue6542. So we no longer use EXCLUDED.
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 return FILEFLAGS_REVLOG_MAIN
421 return FILEFLAGS_REVLOG_MAIN
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 t = FILETYPE_FILELOG_OTHER
423 t = FILETYPE_FILELOG_OTHER
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 t |= FILEFLAGS_VOLATILE
425 t |= FILEFLAGS_VOLATILE
426 return t
426 return t
427 return None
427 return None
428
428
429
429
430 # the file is part of changelog data
430 # the file is part of changelog data
431 FILEFLAGS_CHANGELOG = 1 << 13
431 FILEFLAGS_CHANGELOG = 1 << 13
432 # the file is part of manifest data
432 # the file is part of manifest data
433 FILEFLAGS_MANIFESTLOG = 1 << 12
433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 # the file is part of filelog data
434 # the file is part of filelog data
435 FILEFLAGS_FILELOG = 1 << 11
435 FILEFLAGS_FILELOG = 1 << 11
436 # file that are not directly part of a revlog
436 # file that are not directly part of a revlog
437 FILEFLAGS_OTHER = 1 << 10
437 FILEFLAGS_OTHER = 1 << 10
438
438
439 # the main entry point for a revlog
439 # the main entry point for a revlog
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 # a secondary file for a revlog
441 # a secondary file for a revlog
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443
443
444 # files that are "volatile" and might change between listing and streaming
444 # files that are "volatile" and might change between listing and streaming
445 FILEFLAGS_VOLATILE = 1 << 20
445 FILEFLAGS_VOLATILE = 1 << 20
446
446
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
454
454
455
455
456 @attr.s(slots=True, init=False)
456 @attr.s(slots=True, init=False)
457 class BaseStoreEntry:
457 class BaseStoreEntry:
458 """An entry in the store
458 """An entry in the store
459
459
460 This is returned by `store.walk` and represent some data in the store."""
460 This is returned by `store.walk` and represent some data in the store."""
461
461
462 unencoded_path = attr.ib()
462 unencoded_path = attr.ib()
463 _is_volatile = attr.ib(default=False)
463 _is_volatile = attr.ib(default=False)
464 _file_size = attr.ib(default=None)
464 _file_size = attr.ib(default=None)
465
465
466 def __init__(
466 def __init__(
467 self,
467 self,
468 unencoded_path,
468 unencoded_path,
469 is_volatile=False,
469 is_volatile=False,
470 file_size=None,
470 file_size=None,
471 ):
471 ):
472 self.unencoded_path = unencoded_path
472 self.unencoded_path = unencoded_path
473 self._is_volatile = is_volatile
473 self._is_volatile = is_volatile
474 self._file_size = file_size
474 self._file_size = file_size
475
475
476 def files(self):
476 def files(self):
477 return [
477 return [
478 StoreFile(
478 StoreFile(
479 unencoded_path=self.unencoded_path,
479 unencoded_path=self.unencoded_path,
480 file_size=self._file_size,
480 file_size=self._file_size,
481 is_volatile=self._is_volatile,
481 is_volatile=self._is_volatile,
482 )
482 )
483 ]
483 ]
484
484
485
485
486 @attr.s(slots=True, init=False)
486 @attr.s(slots=True, init=False)
487 class SimpleStoreEntry(BaseStoreEntry):
487 class SimpleStoreEntry(BaseStoreEntry):
488 """A generic entry in the store"""
488 """A generic entry in the store"""
489
489
490 is_revlog = False
490 is_revlog = False
491
491
492
492
493 @attr.s(slots=True, init=False)
493 @attr.s(slots=True, init=False)
494 class RevlogStoreEntry(BaseStoreEntry):
494 class RevlogStoreEntry(BaseStoreEntry):
495 """A revlog entry in the store"""
495 """A revlog entry in the store"""
496
496
497 is_revlog = True
497 is_revlog = True
498 revlog_type = attr.ib(default=None)
498 revlog_type = attr.ib(default=None)
499 is_revlog_main = attr.ib(default=None)
499 is_revlog_main = attr.ib(default=None)
500
500
501 def __init__(
501 def __init__(
502 self,
502 self,
503 unencoded_path,
503 unencoded_path,
504 revlog_type,
504 revlog_type,
505 is_revlog_main=False,
505 is_revlog_main=False,
506 is_volatile=False,
506 is_volatile=False,
507 file_size=None,
507 file_size=None,
508 ):
508 ):
509 super().__init__(
509 super().__init__(
510 unencoded_path=unencoded_path,
510 unencoded_path=unencoded_path,
511 is_volatile=is_volatile,
511 is_volatile=is_volatile,
512 file_size=file_size,
512 file_size=file_size,
513 )
513 )
514 self.revlog_type = revlog_type
514 self.revlog_type = revlog_type
515 self.is_revlog_main = is_revlog_main
515 self.is_revlog_main = is_revlog_main
516
516
517
517
518 @attr.s(slots=True)
518 @attr.s(slots=True)
519 class StoreFile:
519 class StoreFile:
520 """a file matching an entry"""
520 """a file matching an entry"""
521
521
522 unencoded_path = attr.ib()
522 unencoded_path = attr.ib()
523 file_size = attr.ib()
523 _file_size = attr.ib(default=False)
524 is_volatile = attr.ib(default=False)
524 is_volatile = attr.ib(default=False)
525
525
526 def file_size(self, vfs):
527 if self._file_size is not None:
528 return self._file_size
529 try:
530 return vfs.stat(self.unencoded_path).st_size
531 except FileNotFoundError:
532 return 0
533
526
534
527 class basicstore:
535 class basicstore:
528 '''base class for local repository stores'''
536 '''base class for local repository stores'''
529
537
530 def __init__(self, path, vfstype):
538 def __init__(self, path, vfstype):
531 vfs = vfstype(path)
539 vfs = vfstype(path)
532 self.path = vfs.base
540 self.path = vfs.base
533 self.createmode = _calcmode(vfs)
541 self.createmode = _calcmode(vfs)
534 vfs.createmode = self.createmode
542 vfs.createmode = self.createmode
535 self.rawvfs = vfs
543 self.rawvfs = vfs
536 self.vfs = vfsmod.filtervfs(vfs, encodedir)
544 self.vfs = vfsmod.filtervfs(vfs, encodedir)
537 self.opener = self.vfs
545 self.opener = self.vfs
538
546
539 def join(self, f):
547 def join(self, f):
540 return self.path + b'/' + encodedir(f)
548 return self.path + b'/' + encodedir(f)
541
549
542 def _walk(self, relpath, recurse):
550 def _walk(self, relpath, recurse):
543 '''yields (revlog_type, unencoded, size)'''
551 '''yields (revlog_type, unencoded, size)'''
544 path = self.path
552 path = self.path
545 if relpath:
553 if relpath:
546 path += b'/' + relpath
554 path += b'/' + relpath
547 striplen = len(self.path) + 1
555 striplen = len(self.path) + 1
548 l = []
556 l = []
549 if self.rawvfs.isdir(path):
557 if self.rawvfs.isdir(path):
550 visit = [path]
558 visit = [path]
551 readdir = self.rawvfs.readdir
559 readdir = self.rawvfs.readdir
552 while visit:
560 while visit:
553 p = visit.pop()
561 p = visit.pop()
554 for f, kind, st in readdir(p, stat=True):
562 for f, kind, st in readdir(p, stat=True):
555 fp = p + b'/' + f
563 fp = p + b'/' + f
556 rl_type = is_revlog(f, kind, st)
564 rl_type = is_revlog(f, kind, st)
557 if rl_type is not None:
565 if rl_type is not None:
558 n = util.pconvert(fp[striplen:])
566 n = util.pconvert(fp[striplen:])
559 l.append((rl_type, decodedir(n), st.st_size))
567 l.append((rl_type, decodedir(n), st.st_size))
560 elif kind == stat.S_IFDIR and recurse:
568 elif kind == stat.S_IFDIR and recurse:
561 visit.append(fp)
569 visit.append(fp)
562 l.sort()
570 l.sort()
563 return l
571 return l
564
572
565 def changelog(self, trypending, concurrencychecker=None):
573 def changelog(self, trypending, concurrencychecker=None):
566 return changelog.changelog(
574 return changelog.changelog(
567 self.vfs,
575 self.vfs,
568 trypending=trypending,
576 trypending=trypending,
569 concurrencychecker=concurrencychecker,
577 concurrencychecker=concurrencychecker,
570 )
578 )
571
579
572 def manifestlog(self, repo, storenarrowmatch):
580 def manifestlog(self, repo, storenarrowmatch):
573 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
581 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
574 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
582 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
575
583
576 def datafiles(
584 def datafiles(
577 self, matcher=None, undecodable=None
585 self, matcher=None, undecodable=None
578 ) -> Generator[BaseStoreEntry, None, None]:
586 ) -> Generator[BaseStoreEntry, None, None]:
579 """Like walk, but excluding the changelog and root manifest.
587 """Like walk, but excluding the changelog and root manifest.
580
588
581 When [undecodable] is None, revlogs names that can't be
589 When [undecodable] is None, revlogs names that can't be
582 decoded cause an exception. When it is provided, it should
590 decoded cause an exception. When it is provided, it should
583 be a list and the filenames that can't be decoded are added
591 be a list and the filenames that can't be decoded are added
584 to it instead. This is very rarely needed."""
592 to it instead. This is very rarely needed."""
585 files = self._walk(b'data', True) + self._walk(b'meta', True)
593 files = self._walk(b'data', True) + self._walk(b'meta', True)
586 for (t, u, s) in files:
594 for (t, u, s) in files:
587 if t is not None:
595 if t is not None:
588 yield RevlogStoreEntry(
596 yield RevlogStoreEntry(
589 unencoded_path=u,
597 unencoded_path=u,
590 revlog_type=FILEFLAGS_FILELOG,
598 revlog_type=FILEFLAGS_FILELOG,
591 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
599 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
592 is_volatile=bool(t & FILEFLAGS_VOLATILE),
600 is_volatile=bool(t & FILEFLAGS_VOLATILE),
593 file_size=s,
601 file_size=s,
594 )
602 )
595
603
596 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
604 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
597 # yield manifest before changelog
605 # yield manifest before changelog
598 files = reversed(self._walk(b'', False))
606 files = reversed(self._walk(b'', False))
599 for (t, u, s) in files:
607 for (t, u, s) in files:
600 if u.startswith(b'00changelog'):
608 if u.startswith(b'00changelog'):
601 yield RevlogStoreEntry(
609 yield RevlogStoreEntry(
602 unencoded_path=u,
610 unencoded_path=u,
603 revlog_type=FILEFLAGS_CHANGELOG,
611 revlog_type=FILEFLAGS_CHANGELOG,
604 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
612 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
605 is_volatile=bool(t & FILEFLAGS_VOLATILE),
613 is_volatile=bool(t & FILEFLAGS_VOLATILE),
606 file_size=s,
614 file_size=s,
607 )
615 )
608 elif u.startswith(b'00manifest'):
616 elif u.startswith(b'00manifest'):
609 yield RevlogStoreEntry(
617 yield RevlogStoreEntry(
610 unencoded_path=u,
618 unencoded_path=u,
611 revlog_type=FILEFLAGS_MANIFESTLOG,
619 revlog_type=FILEFLAGS_MANIFESTLOG,
612 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
620 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
613 is_volatile=bool(t & FILEFLAGS_VOLATILE),
621 is_volatile=bool(t & FILEFLAGS_VOLATILE),
614 file_size=s,
622 file_size=s,
615 )
623 )
616 else:
624 else:
617 yield SimpleStoreEntry(
625 yield SimpleStoreEntry(
618 unencoded_path=u,
626 unencoded_path=u,
619 is_volatile=bool(t & FILEFLAGS_VOLATILE),
627 is_volatile=bool(t & FILEFLAGS_VOLATILE),
620 file_size=s,
628 file_size=s,
621 )
629 )
622
630
623 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
631 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
624 """return files related to data storage (ie: revlogs)
632 """return files related to data storage (ie: revlogs)
625
633
626 yields (file_type, unencoded, size)
634 yields (file_type, unencoded, size)
627
635
628 if a matcher is passed, storage files of only those tracked paths
636 if a matcher is passed, storage files of only those tracked paths
629 are passed with matches the matcher
637 are passed with matches the matcher
630 """
638 """
631 # yield data files first
639 # yield data files first
632 for x in self.datafiles(matcher):
640 for x in self.datafiles(matcher):
633 yield x
641 yield x
634 for x in self.topfiles():
642 for x in self.topfiles():
635 yield x
643 yield x
636
644
637 def copylist(self):
645 def copylist(self):
638 return _data
646 return _data
639
647
640 def write(self, tr):
648 def write(self, tr):
641 pass
649 pass
642
650
643 def invalidatecaches(self):
651 def invalidatecaches(self):
644 pass
652 pass
645
653
646 def markremoved(self, fn):
654 def markremoved(self, fn):
647 pass
655 pass
648
656
649 def __contains__(self, path):
657 def __contains__(self, path):
650 '''Checks if the store contains path'''
658 '''Checks if the store contains path'''
651 path = b"/".join((b"data", path))
659 path = b"/".join((b"data", path))
652 # file?
660 # file?
653 if self.vfs.exists(path + b".i"):
661 if self.vfs.exists(path + b".i"):
654 return True
662 return True
655 # dir?
663 # dir?
656 if not path.endswith(b"/"):
664 if not path.endswith(b"/"):
657 path = path + b"/"
665 path = path + b"/"
658 return self.vfs.exists(path)
666 return self.vfs.exists(path)
659
667
660
668
661 class encodedstore(basicstore):
669 class encodedstore(basicstore):
662 def __init__(self, path, vfstype):
670 def __init__(self, path, vfstype):
663 vfs = vfstype(path + b'/store')
671 vfs = vfstype(path + b'/store')
664 self.path = vfs.base
672 self.path = vfs.base
665 self.createmode = _calcmode(vfs)
673 self.createmode = _calcmode(vfs)
666 vfs.createmode = self.createmode
674 vfs.createmode = self.createmode
667 self.rawvfs = vfs
675 self.rawvfs = vfs
668 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
676 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
669 self.opener = self.vfs
677 self.opener = self.vfs
670
678
671 # note: topfiles would also need a decode phase. It is just that in
679 # note: topfiles would also need a decode phase. It is just that in
672 # practice we do not have any file outside of `data/` that needs encoding.
680 # practice we do not have any file outside of `data/` that needs encoding.
673 # However that might change so we should probably add a test and encoding
681 # However that might change so we should probably add a test and encoding
674 # decoding for it too. see issue6548
682 # decoding for it too. see issue6548
675
683
676 def datafiles(
684 def datafiles(
677 self, matcher=None, undecodable=None
685 self, matcher=None, undecodable=None
678 ) -> Generator[BaseStoreEntry, None, None]:
686 ) -> Generator[BaseStoreEntry, None, None]:
679 for entry in super(encodedstore, self).datafiles():
687 for entry in super(encodedstore, self).datafiles():
680 try:
688 try:
681 f1 = entry.unencoded_path
689 f1 = entry.unencoded_path
682 f2 = decodefilename(f1)
690 f2 = decodefilename(f1)
683 except KeyError:
691 except KeyError:
684 if undecodable is None:
692 if undecodable is None:
685 msg = _(b'undecodable revlog name %s') % f1
693 msg = _(b'undecodable revlog name %s') % f1
686 raise error.StorageError(msg)
694 raise error.StorageError(msg)
687 else:
695 else:
688 undecodable.append(f1)
696 undecodable.append(f1)
689 continue
697 continue
690 if not _matchtrackedpath(f2, matcher):
698 if not _matchtrackedpath(f2, matcher):
691 continue
699 continue
692 entry.unencoded_path = f2
700 entry.unencoded_path = f2
693 yield entry
701 yield entry
694
702
695 def join(self, f):
703 def join(self, f):
696 return self.path + b'/' + encodefilename(f)
704 return self.path + b'/' + encodefilename(f)
697
705
698 def copylist(self):
706 def copylist(self):
699 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
707 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
700
708
701
709
702 class fncache:
710 class fncache:
703 # the filename used to be partially encoded
711 # the filename used to be partially encoded
704 # hence the encodedir/decodedir dance
712 # hence the encodedir/decodedir dance
705 def __init__(self, vfs):
713 def __init__(self, vfs):
706 self.vfs = vfs
714 self.vfs = vfs
707 self._ignores = set()
715 self._ignores = set()
708 self.entries = None
716 self.entries = None
709 self._dirty = False
717 self._dirty = False
710 # set of new additions to fncache
718 # set of new additions to fncache
711 self.addls = set()
719 self.addls = set()
712
720
713 def ensureloaded(self, warn=None):
721 def ensureloaded(self, warn=None):
714 """read the fncache file if not already read.
722 """read the fncache file if not already read.
715
723
716 If the file on disk is corrupted, raise. If warn is provided,
724 If the file on disk is corrupted, raise. If warn is provided,
717 warn and keep going instead."""
725 warn and keep going instead."""
718 if self.entries is None:
726 if self.entries is None:
719 self._load(warn)
727 self._load(warn)
720
728
721 def _load(self, warn=None):
729 def _load(self, warn=None):
722 '''fill the entries from the fncache file'''
730 '''fill the entries from the fncache file'''
723 self._dirty = False
731 self._dirty = False
724 try:
732 try:
725 fp = self.vfs(b'fncache', mode=b'rb')
733 fp = self.vfs(b'fncache', mode=b'rb')
726 except IOError:
734 except IOError:
727 # skip nonexistent file
735 # skip nonexistent file
728 self.entries = set()
736 self.entries = set()
729 return
737 return
730
738
731 self.entries = set()
739 self.entries = set()
732 chunk = b''
740 chunk = b''
733 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
741 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
734 chunk += c
742 chunk += c
735 try:
743 try:
736 p = chunk.rindex(b'\n')
744 p = chunk.rindex(b'\n')
737 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
745 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
738 chunk = chunk[p + 1 :]
746 chunk = chunk[p + 1 :]
739 except ValueError:
747 except ValueError:
740 # substring '\n' not found, maybe the entry is bigger than the
748 # substring '\n' not found, maybe the entry is bigger than the
741 # chunksize, so let's keep iterating
749 # chunksize, so let's keep iterating
742 pass
750 pass
743
751
744 if chunk:
752 if chunk:
745 msg = _(b"fncache does not ends with a newline")
753 msg = _(b"fncache does not ends with a newline")
746 if warn:
754 if warn:
747 warn(msg + b'\n')
755 warn(msg + b'\n')
748 else:
756 else:
749 raise error.Abort(
757 raise error.Abort(
750 msg,
758 msg,
751 hint=_(
759 hint=_(
752 b"use 'hg debugrebuildfncache' to "
760 b"use 'hg debugrebuildfncache' to "
753 b"rebuild the fncache"
761 b"rebuild the fncache"
754 ),
762 ),
755 )
763 )
756 self._checkentries(fp, warn)
764 self._checkentries(fp, warn)
757 fp.close()
765 fp.close()
758
766
759 def _checkentries(self, fp, warn):
767 def _checkentries(self, fp, warn):
760 """make sure there is no empty string in entries"""
768 """make sure there is no empty string in entries"""
761 if b'' in self.entries:
769 if b'' in self.entries:
762 fp.seek(0)
770 fp.seek(0)
763 for n, line in enumerate(fp):
771 for n, line in enumerate(fp):
764 if not line.rstrip(b'\n'):
772 if not line.rstrip(b'\n'):
765 t = _(b'invalid entry in fncache, line %d') % (n + 1)
773 t = _(b'invalid entry in fncache, line %d') % (n + 1)
766 if warn:
774 if warn:
767 warn(t + b'\n')
775 warn(t + b'\n')
768 else:
776 else:
769 raise error.Abort(t)
777 raise error.Abort(t)
770
778
771 def write(self, tr):
779 def write(self, tr):
772 if self._dirty:
780 if self._dirty:
773 assert self.entries is not None
781 assert self.entries is not None
774 self.entries = self.entries | self.addls
782 self.entries = self.entries | self.addls
775 self.addls = set()
783 self.addls = set()
776 tr.addbackup(b'fncache')
784 tr.addbackup(b'fncache')
777 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
785 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
778 if self.entries:
786 if self.entries:
779 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
787 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
780 fp.close()
788 fp.close()
781 self._dirty = False
789 self._dirty = False
782 if self.addls:
790 if self.addls:
783 # if we have just new entries, let's append them to the fncache
791 # if we have just new entries, let's append them to the fncache
784 tr.addbackup(b'fncache')
792 tr.addbackup(b'fncache')
785 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
793 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
786 if self.addls:
794 if self.addls:
787 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
795 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
788 fp.close()
796 fp.close()
789 self.entries = None
797 self.entries = None
790 self.addls = set()
798 self.addls = set()
791
799
792 def addignore(self, fn):
800 def addignore(self, fn):
793 self._ignores.add(fn)
801 self._ignores.add(fn)
794
802
795 def add(self, fn):
803 def add(self, fn):
796 if fn in self._ignores:
804 if fn in self._ignores:
797 return
805 return
798 if self.entries is None:
806 if self.entries is None:
799 self._load()
807 self._load()
800 if fn not in self.entries:
808 if fn not in self.entries:
801 self.addls.add(fn)
809 self.addls.add(fn)
802
810
803 def remove(self, fn):
811 def remove(self, fn):
804 if self.entries is None:
812 if self.entries is None:
805 self._load()
813 self._load()
806 if fn in self.addls:
814 if fn in self.addls:
807 self.addls.remove(fn)
815 self.addls.remove(fn)
808 return
816 return
809 try:
817 try:
810 self.entries.remove(fn)
818 self.entries.remove(fn)
811 self._dirty = True
819 self._dirty = True
812 except KeyError:
820 except KeyError:
813 pass
821 pass
814
822
815 def __contains__(self, fn):
823 def __contains__(self, fn):
816 if fn in self.addls:
824 if fn in self.addls:
817 return True
825 return True
818 if self.entries is None:
826 if self.entries is None:
819 self._load()
827 self._load()
820 return fn in self.entries
828 return fn in self.entries
821
829
822 def __iter__(self):
830 def __iter__(self):
823 if self.entries is None:
831 if self.entries is None:
824 self._load()
832 self._load()
825 return iter(self.entries | self.addls)
833 return iter(self.entries | self.addls)
826
834
827
835
828 class _fncachevfs(vfsmod.proxyvfs):
836 class _fncachevfs(vfsmod.proxyvfs):
829 def __init__(self, vfs, fnc, encode):
837 def __init__(self, vfs, fnc, encode):
830 vfsmod.proxyvfs.__init__(self, vfs)
838 vfsmod.proxyvfs.__init__(self, vfs)
831 self.fncache = fnc
839 self.fncache = fnc
832 self.encode = encode
840 self.encode = encode
833
841
834 def __call__(self, path, mode=b'r', *args, **kw):
842 def __call__(self, path, mode=b'r', *args, **kw):
835 encoded = self.encode(path)
843 encoded = self.encode(path)
836 if (
844 if (
837 mode not in (b'r', b'rb')
845 mode not in (b'r', b'rb')
838 and (path.startswith(b'data/') or path.startswith(b'meta/'))
846 and (path.startswith(b'data/') or path.startswith(b'meta/'))
839 and revlog_type(path) is not None
847 and revlog_type(path) is not None
840 ):
848 ):
841 # do not trigger a fncache load when adding a file that already is
849 # do not trigger a fncache load when adding a file that already is
842 # known to exist.
850 # known to exist.
843 notload = self.fncache.entries is None and self.vfs.exists(encoded)
851 notload = self.fncache.entries is None and self.vfs.exists(encoded)
844 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
852 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
845 # when appending to an existing file, if the file has size zero,
853 # when appending to an existing file, if the file has size zero,
846 # it should be considered as missing. Such zero-size files are
854 # it should be considered as missing. Such zero-size files are
847 # the result of truncation when a transaction is aborted.
855 # the result of truncation when a transaction is aborted.
848 notload = False
856 notload = False
849 if not notload:
857 if not notload:
850 self.fncache.add(path)
858 self.fncache.add(path)
851 return self.vfs(encoded, mode, *args, **kw)
859 return self.vfs(encoded, mode, *args, **kw)
852
860
853 def join(self, path):
861 def join(self, path):
854 if path:
862 if path:
855 return self.vfs.join(self.encode(path))
863 return self.vfs.join(self.encode(path))
856 else:
864 else:
857 return self.vfs.join(path)
865 return self.vfs.join(path)
858
866
859 def register_file(self, path):
867 def register_file(self, path):
860 """generic hook point to lets fncache steer its stew"""
868 """generic hook point to lets fncache steer its stew"""
861 if path.startswith(b'data/') or path.startswith(b'meta/'):
869 if path.startswith(b'data/') or path.startswith(b'meta/'):
862 self.fncache.add(path)
870 self.fncache.add(path)
863
871
864
872
865 class fncachestore(basicstore):
873 class fncachestore(basicstore):
866 def __init__(self, path, vfstype, dotencode):
874 def __init__(self, path, vfstype, dotencode):
867 if dotencode:
875 if dotencode:
868 encode = _pathencode
876 encode = _pathencode
869 else:
877 else:
870 encode = _plainhybridencode
878 encode = _plainhybridencode
871 self.encode = encode
879 self.encode = encode
872 vfs = vfstype(path + b'/store')
880 vfs = vfstype(path + b'/store')
873 self.path = vfs.base
881 self.path = vfs.base
874 self.pathsep = self.path + b'/'
882 self.pathsep = self.path + b'/'
875 self.createmode = _calcmode(vfs)
883 self.createmode = _calcmode(vfs)
876 vfs.createmode = self.createmode
884 vfs.createmode = self.createmode
877 self.rawvfs = vfs
885 self.rawvfs = vfs
878 fnc = fncache(vfs)
886 fnc = fncache(vfs)
879 self.fncache = fnc
887 self.fncache = fnc
880 self.vfs = _fncachevfs(vfs, fnc, encode)
888 self.vfs = _fncachevfs(vfs, fnc, encode)
881 self.opener = self.vfs
889 self.opener = self.vfs
882
890
883 def join(self, f):
891 def join(self, f):
884 return self.pathsep + self.encode(f)
892 return self.pathsep + self.encode(f)
885
893
886 def getsize(self, path):
894 def getsize(self, path):
887 return self.rawvfs.stat(path).st_size
895 return self.rawvfs.stat(path).st_size
888
896
889 def datafiles(
897 def datafiles(
890 self, matcher=None, undecodable=None
898 self, matcher=None, undecodable=None
891 ) -> Generator[BaseStoreEntry, None, None]:
899 ) -> Generator[BaseStoreEntry, None, None]:
892 for f in sorted(self.fncache):
900 for f in sorted(self.fncache):
893 if not _matchtrackedpath(f, matcher):
901 if not _matchtrackedpath(f, matcher):
894 continue
902 continue
895 ef = self.encode(f)
903 ef = self.encode(f)
896 t = revlog_type(f)
904 t = revlog_type(f)
897 if t is None:
905 if t is None:
898 # Note: this should not be in the fncache then…
906 # Note: this should not be in the fncache then…
899 #
907 #
900 # However the fncache might contains such file added by
908 # However the fncache might contains such file added by
901 # previous version of Mercurial.
909 # previous version of Mercurial.
902 continue
910 continue
903 try:
911 yield RevlogStoreEntry(
904 yield RevlogStoreEntry(
912 unencoded_path=f,
905 unencoded_path=f,
913 revlog_type=FILEFLAGS_FILELOG,
906 revlog_type=FILEFLAGS_FILELOG,
914 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
907 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
915 is_volatile=bool(t & FILEFLAGS_VOLATILE),
908 is_volatile=bool(t & FILEFLAGS_VOLATILE),
916 )
909 file_size=self.getsize(ef),
910 )
911 except FileNotFoundError:
912 pass
913
917
914 def copylist(self):
918 def copylist(self):
915 d = (
919 d = (
916 b'bookmarks',
920 b'bookmarks',
917 b'narrowspec',
921 b'narrowspec',
918 b'data',
922 b'data',
919 b'meta',
923 b'meta',
920 b'dh',
924 b'dh',
921 b'fncache',
925 b'fncache',
922 b'phaseroots',
926 b'phaseroots',
923 b'obsstore',
927 b'obsstore',
924 b'00manifest.d',
928 b'00manifest.d',
925 b'00manifest.i',
929 b'00manifest.i',
926 b'00changelog.d',
930 b'00changelog.d',
927 b'00changelog.i',
931 b'00changelog.i',
928 b'requires',
932 b'requires',
929 )
933 )
930 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
934 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
931
935
932 def write(self, tr):
936 def write(self, tr):
933 self.fncache.write(tr)
937 self.fncache.write(tr)
934
938
935 def invalidatecaches(self):
939 def invalidatecaches(self):
936 self.fncache.entries = None
940 self.fncache.entries = None
937 self.fncache.addls = set()
941 self.fncache.addls = set()
938
942
939 def markremoved(self, fn):
943 def markremoved(self, fn):
940 self.fncache.remove(fn)
944 self.fncache.remove(fn)
941
945
942 def _exists(self, f):
946 def _exists(self, f):
943 ef = self.encode(f)
947 ef = self.encode(f)
944 try:
948 try:
945 self.getsize(ef)
949 self.getsize(ef)
946 return True
950 return True
947 except FileNotFoundError:
951 except FileNotFoundError:
948 return False
952 return False
949
953
950 def __contains__(self, path):
954 def __contains__(self, path):
951 '''Checks if the store contains path'''
955 '''Checks if the store contains path'''
952 path = b"/".join((b"data", path))
956 path = b"/".join((b"data", path))
953 # check for files (exact match)
957 # check for files (exact match)
954 e = path + b'.i'
958 e = path + b'.i'
955 if e in self.fncache and self._exists(e):
959 if e in self.fncache and self._exists(e):
956 return True
960 return True
957 # now check for directories (prefix match)
961 # now check for directories (prefix match)
958 if not path.endswith(b'/'):
962 if not path.endswith(b'/'):
959 path += b'/'
963 path += b'/'
960 for e in self.fncache:
964 for e in self.fncache:
961 if e.startswith(path) and self._exists(e):
965 if e.startswith(path) and self._exists(e):
962 return True
966 return True
963 return False
967 return False
@@ -1,937 +1,939 b''
1 # streamclone.py - producing and consuming streaming repository data
1 # streamclone.py - producing and consuming streaming repository data
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import contextlib
9 import contextlib
10 import os
10 import os
11 import struct
11 import struct
12
12
13 from .i18n import _
13 from .i18n import _
14 from .pycompat import open
14 from .pycompat import open
15 from .interfaces import repository
15 from .interfaces import repository
16 from . import (
16 from . import (
17 bookmarks,
17 bookmarks,
18 cacheutil,
18 cacheutil,
19 error,
19 error,
20 narrowspec,
20 narrowspec,
21 phases,
21 phases,
22 pycompat,
22 pycompat,
23 requirements as requirementsmod,
23 requirements as requirementsmod,
24 scmutil,
24 scmutil,
25 store,
25 store,
26 transaction,
26 transaction,
27 util,
27 util,
28 )
28 )
29 from .revlogutils import (
29 from .revlogutils import (
30 nodemap,
30 nodemap,
31 )
31 )
32
32
33
33
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
35 """determine the final set of requirement for a new stream clone
35 """determine the final set of requirement for a new stream clone
36
36
37 this method combine the "default" requirements that a new repository would
37 this method combine the "default" requirements that a new repository would
38 use with the constaint we get from the stream clone content. We keep local
38 use with the constaint we get from the stream clone content. We keep local
39 configuration choice when possible.
39 configuration choice when possible.
40 """
40 """
41 requirements = set(default_requirements)
41 requirements = set(default_requirements)
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
43 requirements.update(streamed_requirements)
43 requirements.update(streamed_requirements)
44 return requirements
44 return requirements
45
45
46
46
47 def streamed_requirements(repo):
47 def streamed_requirements(repo):
48 """the set of requirement the new clone will have to support
48 """the set of requirement the new clone will have to support
49
49
50 This is used for advertising the stream options and to generate the actual
50 This is used for advertising the stream options and to generate the actual
51 stream content."""
51 stream content."""
52 requiredformats = (
52 requiredformats = (
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
54 )
54 )
55 return requiredformats
55 return requiredformats
56
56
57
57
58 def canperformstreamclone(pullop, bundle2=False):
58 def canperformstreamclone(pullop, bundle2=False):
59 """Whether it is possible to perform a streaming clone as part of pull.
59 """Whether it is possible to perform a streaming clone as part of pull.
60
60
61 ``bundle2`` will cause the function to consider stream clone through
61 ``bundle2`` will cause the function to consider stream clone through
62 bundle2 and only through bundle2.
62 bundle2 and only through bundle2.
63
63
64 Returns a tuple of (supported, requirements). ``supported`` is True if
64 Returns a tuple of (supported, requirements). ``supported`` is True if
65 streaming clone is supported and False otherwise. ``requirements`` is
65 streaming clone is supported and False otherwise. ``requirements`` is
66 a set of repo requirements from the remote, or ``None`` if stream clone
66 a set of repo requirements from the remote, or ``None`` if stream clone
67 isn't supported.
67 isn't supported.
68 """
68 """
69 repo = pullop.repo
69 repo = pullop.repo
70 remote = pullop.remote
70 remote = pullop.remote
71
71
72 bundle2supported = False
72 bundle2supported = False
73 if pullop.canusebundle2:
73 if pullop.canusebundle2:
74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
75 bundle2supported = True
75 bundle2supported = True
76 # else
76 # else
77 # Server doesn't support bundle2 stream clone or doesn't support
77 # Server doesn't support bundle2 stream clone or doesn't support
78 # the versions we support. Fall back and possibly allow legacy.
78 # the versions we support. Fall back and possibly allow legacy.
79
79
80 # Ensures legacy code path uses available bundle2.
80 # Ensures legacy code path uses available bundle2.
81 if bundle2supported and not bundle2:
81 if bundle2supported and not bundle2:
82 return False, None
82 return False, None
83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
84 elif bundle2 and not bundle2supported:
84 elif bundle2 and not bundle2supported:
85 return False, None
85 return False, None
86
86
87 # Streaming clone only works on empty repositories.
87 # Streaming clone only works on empty repositories.
88 if len(repo):
88 if len(repo):
89 return False, None
89 return False, None
90
90
91 # Streaming clone only works if all data is being requested.
91 # Streaming clone only works if all data is being requested.
92 if pullop.heads:
92 if pullop.heads:
93 return False, None
93 return False, None
94
94
95 streamrequested = pullop.streamclonerequested
95 streamrequested = pullop.streamclonerequested
96
96
97 # If we don't have a preference, let the server decide for us. This
97 # If we don't have a preference, let the server decide for us. This
98 # likely only comes into play in LANs.
98 # likely only comes into play in LANs.
99 if streamrequested is None:
99 if streamrequested is None:
100 # The server can advertise whether to prefer streaming clone.
100 # The server can advertise whether to prefer streaming clone.
101 streamrequested = remote.capable(b'stream-preferred')
101 streamrequested = remote.capable(b'stream-preferred')
102
102
103 if not streamrequested:
103 if not streamrequested:
104 return False, None
104 return False, None
105
105
106 # In order for stream clone to work, the client has to support all the
106 # In order for stream clone to work, the client has to support all the
107 # requirements advertised by the server.
107 # requirements advertised by the server.
108 #
108 #
109 # The server advertises its requirements via the "stream" and "streamreqs"
109 # The server advertises its requirements via the "stream" and "streamreqs"
110 # capability. "stream" (a value-less capability) is advertised if and only
110 # capability. "stream" (a value-less capability) is advertised if and only
111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
112 # is advertised and contains a comma-delimited list of requirements.
112 # is advertised and contains a comma-delimited list of requirements.
113 requirements = set()
113 requirements = set()
114 if remote.capable(b'stream'):
114 if remote.capable(b'stream'):
115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
116 else:
116 else:
117 streamreqs = remote.capable(b'streamreqs')
117 streamreqs = remote.capable(b'streamreqs')
118 # This is weird and shouldn't happen with modern servers.
118 # This is weird and shouldn't happen with modern servers.
119 if not streamreqs:
119 if not streamreqs:
120 pullop.repo.ui.warn(
120 pullop.repo.ui.warn(
121 _(
121 _(
122 b'warning: stream clone requested but server has them '
122 b'warning: stream clone requested but server has them '
123 b'disabled\n'
123 b'disabled\n'
124 )
124 )
125 )
125 )
126 return False, None
126 return False, None
127
127
128 streamreqs = set(streamreqs.split(b','))
128 streamreqs = set(streamreqs.split(b','))
129 # Server requires something we don't support. Bail.
129 # Server requires something we don't support. Bail.
130 missingreqs = streamreqs - repo.supported
130 missingreqs = streamreqs - repo.supported
131 if missingreqs:
131 if missingreqs:
132 pullop.repo.ui.warn(
132 pullop.repo.ui.warn(
133 _(
133 _(
134 b'warning: stream clone requested but client is missing '
134 b'warning: stream clone requested but client is missing '
135 b'requirements: %s\n'
135 b'requirements: %s\n'
136 )
136 )
137 % b', '.join(sorted(missingreqs))
137 % b', '.join(sorted(missingreqs))
138 )
138 )
139 pullop.repo.ui.warn(
139 pullop.repo.ui.warn(
140 _(
140 _(
141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
142 b'for more information)\n'
142 b'for more information)\n'
143 )
143 )
144 )
144 )
145 return False, None
145 return False, None
146 requirements = streamreqs
146 requirements = streamreqs
147
147
148 return True, requirements
148 return True, requirements
149
149
150
150
151 def maybeperformlegacystreamclone(pullop):
151 def maybeperformlegacystreamclone(pullop):
152 """Possibly perform a legacy stream clone operation.
152 """Possibly perform a legacy stream clone operation.
153
153
154 Legacy stream clones are performed as part of pull but before all other
154 Legacy stream clones are performed as part of pull but before all other
155 operations.
155 operations.
156
156
157 A legacy stream clone will not be performed if a bundle2 stream clone is
157 A legacy stream clone will not be performed if a bundle2 stream clone is
158 supported.
158 supported.
159 """
159 """
160 from . import localrepo
160 from . import localrepo
161
161
162 supported, requirements = canperformstreamclone(pullop)
162 supported, requirements = canperformstreamclone(pullop)
163
163
164 if not supported:
164 if not supported:
165 return
165 return
166
166
167 repo = pullop.repo
167 repo = pullop.repo
168 remote = pullop.remote
168 remote = pullop.remote
169
169
170 # Save remote branchmap. We will use it later to speed up branchcache
170 # Save remote branchmap. We will use it later to speed up branchcache
171 # creation.
171 # creation.
172 rbranchmap = None
172 rbranchmap = None
173 if remote.capable(b'branchmap'):
173 if remote.capable(b'branchmap'):
174 with remote.commandexecutor() as e:
174 with remote.commandexecutor() as e:
175 rbranchmap = e.callcommand(b'branchmap', {}).result()
175 rbranchmap = e.callcommand(b'branchmap', {}).result()
176
176
177 repo.ui.status(_(b'streaming all changes\n'))
177 repo.ui.status(_(b'streaming all changes\n'))
178
178
179 with remote.commandexecutor() as e:
179 with remote.commandexecutor() as e:
180 fp = e.callcommand(b'stream_out', {}).result()
180 fp = e.callcommand(b'stream_out', {}).result()
181
181
182 # TODO strictly speaking, this code should all be inside the context
182 # TODO strictly speaking, this code should all be inside the context
183 # manager because the context manager is supposed to ensure all wire state
183 # manager because the context manager is supposed to ensure all wire state
184 # is flushed when exiting. But the legacy peers don't do this, so it
184 # is flushed when exiting. But the legacy peers don't do this, so it
185 # doesn't matter.
185 # doesn't matter.
186 l = fp.readline()
186 l = fp.readline()
187 try:
187 try:
188 resp = int(l)
188 resp = int(l)
189 except ValueError:
189 except ValueError:
190 raise error.ResponseError(
190 raise error.ResponseError(
191 _(b'unexpected response from remote server:'), l
191 _(b'unexpected response from remote server:'), l
192 )
192 )
193 if resp == 1:
193 if resp == 1:
194 raise error.Abort(_(b'operation forbidden by server'))
194 raise error.Abort(_(b'operation forbidden by server'))
195 elif resp == 2:
195 elif resp == 2:
196 raise error.Abort(_(b'locking the remote repository failed'))
196 raise error.Abort(_(b'locking the remote repository failed'))
197 elif resp != 0:
197 elif resp != 0:
198 raise error.Abort(_(b'the server sent an unknown error code'))
198 raise error.Abort(_(b'the server sent an unknown error code'))
199
199
200 l = fp.readline()
200 l = fp.readline()
201 try:
201 try:
202 filecount, bytecount = map(int, l.split(b' ', 1))
202 filecount, bytecount = map(int, l.split(b' ', 1))
203 except (ValueError, TypeError):
203 except (ValueError, TypeError):
204 raise error.ResponseError(
204 raise error.ResponseError(
205 _(b'unexpected response from remote server:'), l
205 _(b'unexpected response from remote server:'), l
206 )
206 )
207
207
208 with repo.lock():
208 with repo.lock():
209 consumev1(repo, fp, filecount, bytecount)
209 consumev1(repo, fp, filecount, bytecount)
210 repo.requirements = new_stream_clone_requirements(
210 repo.requirements = new_stream_clone_requirements(
211 repo.requirements,
211 repo.requirements,
212 requirements,
212 requirements,
213 )
213 )
214 repo.svfs.options = localrepo.resolvestorevfsoptions(
214 repo.svfs.options = localrepo.resolvestorevfsoptions(
215 repo.ui, repo.requirements, repo.features
215 repo.ui, repo.requirements, repo.features
216 )
216 )
217 scmutil.writereporequirements(repo)
217 scmutil.writereporequirements(repo)
218 nodemap.post_stream_cleanup(repo)
218 nodemap.post_stream_cleanup(repo)
219
219
220 if rbranchmap:
220 if rbranchmap:
221 repo._branchcaches.replace(repo, rbranchmap)
221 repo._branchcaches.replace(repo, rbranchmap)
222
222
223 repo.invalidate()
223 repo.invalidate()
224
224
225
225
226 def allowservergeneration(repo):
226 def allowservergeneration(repo):
227 """Whether streaming clones are allowed from the server."""
227 """Whether streaming clones are allowed from the server."""
228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
229 return False
229 return False
230
230
231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
232 return False
232 return False
233
233
234 # The way stream clone works makes it impossible to hide secret changesets.
234 # The way stream clone works makes it impossible to hide secret changesets.
235 # So don't allow this by default.
235 # So don't allow this by default.
236 secret = phases.hassecret(repo)
236 secret = phases.hassecret(repo)
237 if secret:
237 if secret:
238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
239
239
240 return True
240 return True
241
241
242
242
243 # This is it's own function so extensions can override it.
243 # This is it's own function so extensions can override it.
244 def _walkstreamfiles(repo, matcher=None):
244 def _walkstreamfiles(repo, matcher=None):
245 return repo.store.walk(matcher)
245 return repo.store.walk(matcher)
246
246
247
247
248 def generatev1(repo):
248 def generatev1(repo):
249 """Emit content for version 1 of a streaming clone.
249 """Emit content for version 1 of a streaming clone.
250
250
251 This returns a 3-tuple of (file count, byte size, data iterator).
251 This returns a 3-tuple of (file count, byte size, data iterator).
252
252
253 The data iterator consists of N entries for each file being transferred.
253 The data iterator consists of N entries for each file being transferred.
254 Each file entry starts as a line with the file name and integer size
254 Each file entry starts as a line with the file name and integer size
255 delimited by a null byte.
255 delimited by a null byte.
256
256
257 The raw file data follows. Following the raw file data is the next file
257 The raw file data follows. Following the raw file data is the next file
258 entry, or EOF.
258 entry, or EOF.
259
259
260 When used on the wire protocol, an additional line indicating protocol
260 When used on the wire protocol, an additional line indicating protocol
261 success will be prepended to the stream. This function is not responsible
261 success will be prepended to the stream. This function is not responsible
262 for adding it.
262 for adding it.
263
263
264 This function will obtain a repository lock to ensure a consistent view of
264 This function will obtain a repository lock to ensure a consistent view of
265 the store is captured. It therefore may raise LockError.
265 the store is captured. It therefore may raise LockError.
266 """
266 """
267 entries = []
267 entries = []
268 total_bytes = 0
268 total_bytes = 0
269 # Get consistent snapshot of repo, lock during scan.
269 # Get consistent snapshot of repo, lock during scan.
270 with repo.lock():
270 with repo.lock():
271 repo.ui.debug(b'scanning\n')
271 repo.ui.debug(b'scanning\n')
272 for entry in _walkstreamfiles(repo):
272 for entry in _walkstreamfiles(repo):
273 for f in entry.files():
273 for f in entry.files():
274 if f.file_size:
274 file_size = f.file_size(repo.store.vfs)
275 entries.append((f.unencoded_path, f.file_size))
275 if file_size:
276 total_bytes += f.file_size
276 entries.append((f.unencoded_path, file_size))
277 total_bytes += file_size
277 _test_sync_point_walk_1(repo)
278 _test_sync_point_walk_1(repo)
278 _test_sync_point_walk_2(repo)
279 _test_sync_point_walk_2(repo)
279
280
280 repo.ui.debug(
281 repo.ui.debug(
281 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
282 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
282 )
283 )
283
284
284 svfs = repo.svfs
285 svfs = repo.svfs
285 debugflag = repo.ui.debugflag
286 debugflag = repo.ui.debugflag
286
287
287 def emitrevlogdata():
288 def emitrevlogdata():
288 for name, size in entries:
289 for name, size in entries:
289 if debugflag:
290 if debugflag:
290 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
291 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
291 # partially encode name over the wire for backwards compat
292 # partially encode name over the wire for backwards compat
292 yield b'%s\0%d\n' % (store.encodedir(name), size)
293 yield b'%s\0%d\n' % (store.encodedir(name), size)
293 # auditing at this stage is both pointless (paths are already
294 # auditing at this stage is both pointless (paths are already
294 # trusted by the local repo) and expensive
295 # trusted by the local repo) and expensive
295 with svfs(name, b'rb', auditpath=False) as fp:
296 with svfs(name, b'rb', auditpath=False) as fp:
296 if size <= 65536:
297 if size <= 65536:
297 yield fp.read(size)
298 yield fp.read(size)
298 else:
299 else:
299 for chunk in util.filechunkiter(fp, limit=size):
300 for chunk in util.filechunkiter(fp, limit=size):
300 yield chunk
301 yield chunk
301
302
302 return len(entries), total_bytes, emitrevlogdata()
303 return len(entries), total_bytes, emitrevlogdata()
303
304
304
305
305 def generatev1wireproto(repo):
306 def generatev1wireproto(repo):
306 """Emit content for version 1 of streaming clone suitable for the wire.
307 """Emit content for version 1 of streaming clone suitable for the wire.
307
308
308 This is the data output from ``generatev1()`` with 2 header lines. The
309 This is the data output from ``generatev1()`` with 2 header lines. The
309 first line indicates overall success. The 2nd contains the file count and
310 first line indicates overall success. The 2nd contains the file count and
310 byte size of payload.
311 byte size of payload.
311
312
312 The success line contains "0" for success, "1" for stream generation not
313 The success line contains "0" for success, "1" for stream generation not
313 allowed, and "2" for error locking the repository (possibly indicating
314 allowed, and "2" for error locking the repository (possibly indicating
314 a permissions error for the server process).
315 a permissions error for the server process).
315 """
316 """
316 if not allowservergeneration(repo):
317 if not allowservergeneration(repo):
317 yield b'1\n'
318 yield b'1\n'
318 return
319 return
319
320
320 try:
321 try:
321 filecount, bytecount, it = generatev1(repo)
322 filecount, bytecount, it = generatev1(repo)
322 except error.LockError:
323 except error.LockError:
323 yield b'2\n'
324 yield b'2\n'
324 return
325 return
325
326
326 # Indicates successful response.
327 # Indicates successful response.
327 yield b'0\n'
328 yield b'0\n'
328 yield b'%d %d\n' % (filecount, bytecount)
329 yield b'%d %d\n' % (filecount, bytecount)
329 for chunk in it:
330 for chunk in it:
330 yield chunk
331 yield chunk
331
332
332
333
333 def generatebundlev1(repo, compression=b'UN'):
334 def generatebundlev1(repo, compression=b'UN'):
334 """Emit content for version 1 of a stream clone bundle.
335 """Emit content for version 1 of a stream clone bundle.
335
336
336 The first 4 bytes of the output ("HGS1") denote this as stream clone
337 The first 4 bytes of the output ("HGS1") denote this as stream clone
337 bundle version 1.
338 bundle version 1.
338
339
339 The next 2 bytes indicate the compression type. Only "UN" is currently
340 The next 2 bytes indicate the compression type. Only "UN" is currently
340 supported.
341 supported.
341
342
342 The next 16 bytes are two 64-bit big endian unsigned integers indicating
343 The next 16 bytes are two 64-bit big endian unsigned integers indicating
343 file count and byte count, respectively.
344 file count and byte count, respectively.
344
345
345 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
346 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
346 of the requirements string, including a trailing \0. The following N bytes
347 of the requirements string, including a trailing \0. The following N bytes
347 are the requirements string, which is ASCII containing a comma-delimited
348 are the requirements string, which is ASCII containing a comma-delimited
348 list of repo requirements that are needed to support the data.
349 list of repo requirements that are needed to support the data.
349
350
350 The remaining content is the output of ``generatev1()`` (which may be
351 The remaining content is the output of ``generatev1()`` (which may be
351 compressed in the future).
352 compressed in the future).
352
353
353 Returns a tuple of (requirements, data generator).
354 Returns a tuple of (requirements, data generator).
354 """
355 """
355 if compression != b'UN':
356 if compression != b'UN':
356 raise ValueError(b'we do not support the compression argument yet')
357 raise ValueError(b'we do not support the compression argument yet')
357
358
358 requirements = streamed_requirements(repo)
359 requirements = streamed_requirements(repo)
359 requires = b','.join(sorted(requirements))
360 requires = b','.join(sorted(requirements))
360
361
361 def gen():
362 def gen():
362 yield b'HGS1'
363 yield b'HGS1'
363 yield compression
364 yield compression
364
365
365 filecount, bytecount, it = generatev1(repo)
366 filecount, bytecount, it = generatev1(repo)
366 repo.ui.status(
367 repo.ui.status(
367 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
368 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
368 )
369 )
369
370
370 yield struct.pack(b'>QQ', filecount, bytecount)
371 yield struct.pack(b'>QQ', filecount, bytecount)
371 yield struct.pack(b'>H', len(requires) + 1)
372 yield struct.pack(b'>H', len(requires) + 1)
372 yield requires + b'\0'
373 yield requires + b'\0'
373
374
374 # This is where we'll add compression in the future.
375 # This is where we'll add compression in the future.
375 assert compression == b'UN'
376 assert compression == b'UN'
376
377
377 progress = repo.ui.makeprogress(
378 progress = repo.ui.makeprogress(
378 _(b'bundle'), total=bytecount, unit=_(b'bytes')
379 _(b'bundle'), total=bytecount, unit=_(b'bytes')
379 )
380 )
380 progress.update(0)
381 progress.update(0)
381
382
382 for chunk in it:
383 for chunk in it:
383 progress.increment(step=len(chunk))
384 progress.increment(step=len(chunk))
384 yield chunk
385 yield chunk
385
386
386 progress.complete()
387 progress.complete()
387
388
388 return requirements, gen()
389 return requirements, gen()
389
390
390
391
391 def consumev1(repo, fp, filecount, bytecount):
392 def consumev1(repo, fp, filecount, bytecount):
392 """Apply the contents from version 1 of a streaming clone file handle.
393 """Apply the contents from version 1 of a streaming clone file handle.
393
394
394 This takes the output from "stream_out" and applies it to the specified
395 This takes the output from "stream_out" and applies it to the specified
395 repository.
396 repository.
396
397
397 Like "stream_out," the status line added by the wire protocol is not
398 Like "stream_out," the status line added by the wire protocol is not
398 handled by this function.
399 handled by this function.
399 """
400 """
400 with repo.lock():
401 with repo.lock():
401 repo.ui.status(
402 repo.ui.status(
402 _(b'%d files to transfer, %s of data\n')
403 _(b'%d files to transfer, %s of data\n')
403 % (filecount, util.bytecount(bytecount))
404 % (filecount, util.bytecount(bytecount))
404 )
405 )
405 progress = repo.ui.makeprogress(
406 progress = repo.ui.makeprogress(
406 _(b'clone'), total=bytecount, unit=_(b'bytes')
407 _(b'clone'), total=bytecount, unit=_(b'bytes')
407 )
408 )
408 progress.update(0)
409 progress.update(0)
409 start = util.timer()
410 start = util.timer()
410
411
411 # TODO: get rid of (potential) inconsistency
412 # TODO: get rid of (potential) inconsistency
412 #
413 #
413 # If transaction is started and any @filecache property is
414 # If transaction is started and any @filecache property is
414 # changed at this point, it causes inconsistency between
415 # changed at this point, it causes inconsistency between
415 # in-memory cached property and streamclone-ed file on the
416 # in-memory cached property and streamclone-ed file on the
416 # disk. Nested transaction prevents transaction scope "clone"
417 # disk. Nested transaction prevents transaction scope "clone"
417 # below from writing in-memory changes out at the end of it,
418 # below from writing in-memory changes out at the end of it,
418 # even though in-memory changes are discarded at the end of it
419 # even though in-memory changes are discarded at the end of it
419 # regardless of transaction nesting.
420 # regardless of transaction nesting.
420 #
421 #
421 # But transaction nesting can't be simply prohibited, because
422 # But transaction nesting can't be simply prohibited, because
422 # nesting occurs also in ordinary case (e.g. enabling
423 # nesting occurs also in ordinary case (e.g. enabling
423 # clonebundles).
424 # clonebundles).
424
425
425 with repo.transaction(b'clone'):
426 with repo.transaction(b'clone'):
426 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
427 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
427 for i in range(filecount):
428 for i in range(filecount):
428 # XXX doesn't support '\n' or '\r' in filenames
429 # XXX doesn't support '\n' or '\r' in filenames
429 l = fp.readline()
430 l = fp.readline()
430 try:
431 try:
431 name, size = l.split(b'\0', 1)
432 name, size = l.split(b'\0', 1)
432 size = int(size)
433 size = int(size)
433 except (ValueError, TypeError):
434 except (ValueError, TypeError):
434 raise error.ResponseError(
435 raise error.ResponseError(
435 _(b'unexpected response from remote server:'), l
436 _(b'unexpected response from remote server:'), l
436 )
437 )
437 if repo.ui.debugflag:
438 if repo.ui.debugflag:
438 repo.ui.debug(
439 repo.ui.debug(
439 b'adding %s (%s)\n' % (name, util.bytecount(size))
440 b'adding %s (%s)\n' % (name, util.bytecount(size))
440 )
441 )
441 # for backwards compat, name was partially encoded
442 # for backwards compat, name was partially encoded
442 path = store.decodedir(name)
443 path = store.decodedir(name)
443 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
444 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
444 for chunk in util.filechunkiter(fp, limit=size):
445 for chunk in util.filechunkiter(fp, limit=size):
445 progress.increment(step=len(chunk))
446 progress.increment(step=len(chunk))
446 ofp.write(chunk)
447 ofp.write(chunk)
447
448
448 # force @filecache properties to be reloaded from
449 # force @filecache properties to be reloaded from
449 # streamclone-ed file at next access
450 # streamclone-ed file at next access
450 repo.invalidate(clearfilecache=True)
451 repo.invalidate(clearfilecache=True)
451
452
452 elapsed = util.timer() - start
453 elapsed = util.timer() - start
453 if elapsed <= 0:
454 if elapsed <= 0:
454 elapsed = 0.001
455 elapsed = 0.001
455 progress.complete()
456 progress.complete()
456 repo.ui.status(
457 repo.ui.status(
457 _(b'transferred %s in %.1f seconds (%s/sec)\n')
458 _(b'transferred %s in %.1f seconds (%s/sec)\n')
458 % (
459 % (
459 util.bytecount(bytecount),
460 util.bytecount(bytecount),
460 elapsed,
461 elapsed,
461 util.bytecount(bytecount / elapsed),
462 util.bytecount(bytecount / elapsed),
462 )
463 )
463 )
464 )
464
465
465
466
466 def readbundle1header(fp):
467 def readbundle1header(fp):
467 compression = fp.read(2)
468 compression = fp.read(2)
468 if compression != b'UN':
469 if compression != b'UN':
469 raise error.Abort(
470 raise error.Abort(
470 _(
471 _(
471 b'only uncompressed stream clone bundles are '
472 b'only uncompressed stream clone bundles are '
472 b'supported; got %s'
473 b'supported; got %s'
473 )
474 )
474 % compression
475 % compression
475 )
476 )
476
477
477 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
478 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
478 requireslen = struct.unpack(b'>H', fp.read(2))[0]
479 requireslen = struct.unpack(b'>H', fp.read(2))[0]
479 requires = fp.read(requireslen)
480 requires = fp.read(requireslen)
480
481
481 if not requires.endswith(b'\0'):
482 if not requires.endswith(b'\0'):
482 raise error.Abort(
483 raise error.Abort(
483 _(
484 _(
484 b'malformed stream clone bundle: '
485 b'malformed stream clone bundle: '
485 b'requirements not properly encoded'
486 b'requirements not properly encoded'
486 )
487 )
487 )
488 )
488
489
489 requirements = set(requires.rstrip(b'\0').split(b','))
490 requirements = set(requires.rstrip(b'\0').split(b','))
490
491
491 return filecount, bytecount, requirements
492 return filecount, bytecount, requirements
492
493
493
494
494 def applybundlev1(repo, fp):
495 def applybundlev1(repo, fp):
495 """Apply the content from a stream clone bundle version 1.
496 """Apply the content from a stream clone bundle version 1.
496
497
497 We assume the 4 byte header has been read and validated and the file handle
498 We assume the 4 byte header has been read and validated and the file handle
498 is at the 2 byte compression identifier.
499 is at the 2 byte compression identifier.
499 """
500 """
500 if len(repo):
501 if len(repo):
501 raise error.Abort(
502 raise error.Abort(
502 _(b'cannot apply stream clone bundle on non-empty repo')
503 _(b'cannot apply stream clone bundle on non-empty repo')
503 )
504 )
504
505
505 filecount, bytecount, requirements = readbundle1header(fp)
506 filecount, bytecount, requirements = readbundle1header(fp)
506 missingreqs = requirements - repo.supported
507 missingreqs = requirements - repo.supported
507 if missingreqs:
508 if missingreqs:
508 raise error.Abort(
509 raise error.Abort(
509 _(b'unable to apply stream clone: unsupported format: %s')
510 _(b'unable to apply stream clone: unsupported format: %s')
510 % b', '.join(sorted(missingreqs))
511 % b', '.join(sorted(missingreqs))
511 )
512 )
512
513
513 consumev1(repo, fp, filecount, bytecount)
514 consumev1(repo, fp, filecount, bytecount)
514 nodemap.post_stream_cleanup(repo)
515 nodemap.post_stream_cleanup(repo)
515
516
516
517
517 class streamcloneapplier:
518 class streamcloneapplier:
518 """Class to manage applying streaming clone bundles.
519 """Class to manage applying streaming clone bundles.
519
520
520 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
521 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
521 readers to perform bundle type-specific functionality.
522 readers to perform bundle type-specific functionality.
522 """
523 """
523
524
524 def __init__(self, fh):
525 def __init__(self, fh):
525 self._fh = fh
526 self._fh = fh
526
527
527 def apply(self, repo):
528 def apply(self, repo):
528 return applybundlev1(repo, self._fh)
529 return applybundlev1(repo, self._fh)
529
530
530
531
531 # type of file to stream
532 # type of file to stream
532 _fileappend = 0 # append only file
533 _fileappend = 0 # append only file
533 _filefull = 1 # full snapshot file
534 _filefull = 1 # full snapshot file
534
535
535 # Source of the file
536 # Source of the file
536 _srcstore = b's' # store (svfs)
537 _srcstore = b's' # store (svfs)
537 _srccache = b'c' # cache (cache)
538 _srccache = b'c' # cache (cache)
538
539
539 # This is it's own function so extensions can override it.
540 # This is it's own function so extensions can override it.
540 def _walkstreamfullstorefiles(repo):
541 def _walkstreamfullstorefiles(repo):
541 """list snapshot file from the store"""
542 """list snapshot file from the store"""
542 fnames = []
543 fnames = []
543 if not repo.publishing():
544 if not repo.publishing():
544 fnames.append(b'phaseroots')
545 fnames.append(b'phaseroots')
545 return fnames
546 return fnames
546
547
547
548
548 def _filterfull(entry, copy, vfsmap):
549 def _filterfull(entry, copy, vfsmap):
549 """actually copy the snapshot files"""
550 """actually copy the snapshot files"""
550 src, name, ftype, data = entry
551 src, name, ftype, data = entry
551 if ftype != _filefull:
552 if ftype != _filefull:
552 return entry
553 return entry
553 return (src, name, ftype, copy(vfsmap[src].join(name)))
554 return (src, name, ftype, copy(vfsmap[src].join(name)))
554
555
555
556
556 @contextlib.contextmanager
557 @contextlib.contextmanager
557 def maketempcopies():
558 def maketempcopies():
558 """return a function to temporary copy file"""
559 """return a function to temporary copy file"""
559
560
560 files = []
561 files = []
561 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
562 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
562 try:
563 try:
563
564
564 def copy(src):
565 def copy(src):
565 fd, dst = pycompat.mkstemp(
566 fd, dst = pycompat.mkstemp(
566 prefix=os.path.basename(src), dir=dst_dir
567 prefix=os.path.basename(src), dir=dst_dir
567 )
568 )
568 os.close(fd)
569 os.close(fd)
569 files.append(dst)
570 files.append(dst)
570 util.copyfiles(src, dst, hardlink=True)
571 util.copyfiles(src, dst, hardlink=True)
571 return dst
572 return dst
572
573
573 yield copy
574 yield copy
574 finally:
575 finally:
575 for tmp in files:
576 for tmp in files:
576 util.tryunlink(tmp)
577 util.tryunlink(tmp)
577 util.tryrmdir(dst_dir)
578 util.tryrmdir(dst_dir)
578
579
579
580
580 def _makemap(repo):
581 def _makemap(repo):
581 """make a (src -> vfs) map for the repo"""
582 """make a (src -> vfs) map for the repo"""
582 vfsmap = {
583 vfsmap = {
583 _srcstore: repo.svfs,
584 _srcstore: repo.svfs,
584 _srccache: repo.cachevfs,
585 _srccache: repo.cachevfs,
585 }
586 }
586 # we keep repo.vfs out of the on purpose, ther are too many danger there
587 # we keep repo.vfs out of the on purpose, ther are too many danger there
587 # (eg: .hg/hgrc)
588 # (eg: .hg/hgrc)
588 assert repo.vfs not in vfsmap.values()
589 assert repo.vfs not in vfsmap.values()
589
590
590 return vfsmap
591 return vfsmap
591
592
592
593
593 def _emit2(repo, entries, totalfilesize):
594 def _emit2(repo, entries, totalfilesize):
594 """actually emit the stream bundle"""
595 """actually emit the stream bundle"""
595 vfsmap = _makemap(repo)
596 vfsmap = _makemap(repo)
596 # we keep repo.vfs out of the on purpose, ther are too many danger there
597 # we keep repo.vfs out of the on purpose, ther are too many danger there
597 # (eg: .hg/hgrc),
598 # (eg: .hg/hgrc),
598 #
599 #
599 # this assert is duplicated (from _makemap) as author might think this is
600 # this assert is duplicated (from _makemap) as author might think this is
600 # fine, while this is really not fine.
601 # fine, while this is really not fine.
601 if repo.vfs in vfsmap.values():
602 if repo.vfs in vfsmap.values():
602 raise error.ProgrammingError(
603 raise error.ProgrammingError(
603 b'repo.vfs must not be added to vfsmap for security reasons'
604 b'repo.vfs must not be added to vfsmap for security reasons'
604 )
605 )
605
606
606 progress = repo.ui.makeprogress(
607 progress = repo.ui.makeprogress(
607 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
608 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
608 )
609 )
609 progress.update(0)
610 progress.update(0)
610 with maketempcopies() as copy, progress:
611 with maketempcopies() as copy, progress:
611 # copy is delayed until we are in the try
612 # copy is delayed until we are in the try
612 entries = [_filterfull(e, copy, vfsmap) for e in entries]
613 entries = [_filterfull(e, copy, vfsmap) for e in entries]
613 yield None # this release the lock on the repository
614 yield None # this release the lock on the repository
614 totalbytecount = 0
615 totalbytecount = 0
615
616
616 for src, name, ftype, data in entries:
617 for src, name, ftype, data in entries:
617 vfs = vfsmap[src]
618 vfs = vfsmap[src]
618 yield src
619 yield src
619 yield util.uvarintencode(len(name))
620 yield util.uvarintencode(len(name))
620 if ftype == _fileappend:
621 if ftype == _fileappend:
621 fp = vfs(name)
622 fp = vfs(name)
622 size = data
623 size = data
623 elif ftype == _filefull:
624 elif ftype == _filefull:
624 fp = open(data, b'rb')
625 fp = open(data, b'rb')
625 size = util.fstat(fp).st_size
626 size = util.fstat(fp).st_size
626 bytecount = 0
627 bytecount = 0
627 try:
628 try:
628 yield util.uvarintencode(size)
629 yield util.uvarintencode(size)
629 yield name
630 yield name
630 if size <= 65536:
631 if size <= 65536:
631 chunks = (fp.read(size),)
632 chunks = (fp.read(size),)
632 else:
633 else:
633 chunks = util.filechunkiter(fp, limit=size)
634 chunks = util.filechunkiter(fp, limit=size)
634 for chunk in chunks:
635 for chunk in chunks:
635 bytecount += len(chunk)
636 bytecount += len(chunk)
636 totalbytecount += len(chunk)
637 totalbytecount += len(chunk)
637 progress.update(totalbytecount)
638 progress.update(totalbytecount)
638 yield chunk
639 yield chunk
639 if bytecount != size:
640 if bytecount != size:
640 # Would most likely be caused by a race due to `hg strip` or
641 # Would most likely be caused by a race due to `hg strip` or
641 # a revlog split
642 # a revlog split
642 raise error.Abort(
643 raise error.Abort(
643 _(
644 _(
644 b'clone could only read %d bytes from %s, but '
645 b'clone could only read %d bytes from %s, but '
645 b'expected %d bytes'
646 b'expected %d bytes'
646 )
647 )
647 % (bytecount, name, size)
648 % (bytecount, name, size)
648 )
649 )
649 finally:
650 finally:
650 fp.close()
651 fp.close()
651
652
652
653
653 def _test_sync_point_walk_1(repo):
654 def _test_sync_point_walk_1(repo):
654 """a function for synchronisation during tests"""
655 """a function for synchronisation during tests"""
655
656
656
657
657 def _test_sync_point_walk_2(repo):
658 def _test_sync_point_walk_2(repo):
658 """a function for synchronisation during tests"""
659 """a function for synchronisation during tests"""
659
660
660
661
661 def _v2_walk(repo, includes, excludes, includeobsmarkers):
662 def _v2_walk(repo, includes, excludes, includeobsmarkers):
662 """emit a seris of files information useful to clone a repo
663 """emit a seris of files information useful to clone a repo
663
664
664 return (entries, totalfilesize)
665 return (entries, totalfilesize)
665
666
666 entries is a list of tuple (vfs-key, file-path, file-type, size)
667 entries is a list of tuple (vfs-key, file-path, file-type, size)
667
668
668 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
669 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
669 - `name`: file path of the file to copy (to be feed to the vfss)
670 - `name`: file path of the file to copy (to be feed to the vfss)
670 - `file-type`: do this file need to be copied with the source lock ?
671 - `file-type`: do this file need to be copied with the source lock ?
671 - `size`: the size of the file (or None)
672 - `size`: the size of the file (or None)
672 """
673 """
673 assert repo._currentlock(repo._lockref) is not None
674 assert repo._currentlock(repo._lockref) is not None
674 entries = []
675 entries = []
675 totalfilesize = 0
676 totalfilesize = 0
676
677
677 matcher = None
678 matcher = None
678 if includes or excludes:
679 if includes or excludes:
679 matcher = narrowspec.match(repo.root, includes, excludes)
680 matcher = narrowspec.match(repo.root, includes, excludes)
680
681
681 for entry in _walkstreamfiles(repo, matcher):
682 for entry in _walkstreamfiles(repo, matcher):
682 for f in entry.files():
683 for f in entry.files():
683 if f.file_size:
684 file_size = f.file_size(repo.store.vfs)
685 if file_size:
684 ft = _fileappend
686 ft = _fileappend
685 if f.is_volatile:
687 if f.is_volatile:
686 ft = _filefull
688 ft = _filefull
687 entries.append((_srcstore, f.unencoded_path, ft, f.file_size))
689 entries.append((_srcstore, f.unencoded_path, ft, file_size))
688 totalfilesize += f.file_size
690 totalfilesize += file_size
689 for name in _walkstreamfullstorefiles(repo):
691 for name in _walkstreamfullstorefiles(repo):
690 if repo.svfs.exists(name):
692 if repo.svfs.exists(name):
691 totalfilesize += repo.svfs.lstat(name).st_size
693 totalfilesize += repo.svfs.lstat(name).st_size
692 entries.append((_srcstore, name, _filefull, None))
694 entries.append((_srcstore, name, _filefull, None))
693 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
695 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
694 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
696 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
695 entries.append((_srcstore, b'obsstore', _filefull, None))
697 entries.append((_srcstore, b'obsstore', _filefull, None))
696 for name in cacheutil.cachetocopy(repo):
698 for name in cacheutil.cachetocopy(repo):
697 if repo.cachevfs.exists(name):
699 if repo.cachevfs.exists(name):
698 totalfilesize += repo.cachevfs.lstat(name).st_size
700 totalfilesize += repo.cachevfs.lstat(name).st_size
699 entries.append((_srccache, name, _filefull, None))
701 entries.append((_srccache, name, _filefull, None))
700 return entries, totalfilesize
702 return entries, totalfilesize
701
703
702
704
703 def generatev2(repo, includes, excludes, includeobsmarkers):
705 def generatev2(repo, includes, excludes, includeobsmarkers):
704 """Emit content for version 2 of a streaming clone.
706 """Emit content for version 2 of a streaming clone.
705
707
706 the data stream consists the following entries:
708 the data stream consists the following entries:
707 1) A char representing the file destination (eg: store or cache)
709 1) A char representing the file destination (eg: store or cache)
708 2) A varint containing the length of the filename
710 2) A varint containing the length of the filename
709 3) A varint containing the length of file data
711 3) A varint containing the length of file data
710 4) N bytes containing the filename (the internal, store-agnostic form)
712 4) N bytes containing the filename (the internal, store-agnostic form)
711 5) N bytes containing the file data
713 5) N bytes containing the file data
712
714
713 Returns a 3-tuple of (file count, file size, data iterator).
715 Returns a 3-tuple of (file count, file size, data iterator).
714 """
716 """
715
717
716 with repo.lock():
718 with repo.lock():
717
719
718 repo.ui.debug(b'scanning\n')
720 repo.ui.debug(b'scanning\n')
719
721
720 entries, totalfilesize = _v2_walk(
722 entries, totalfilesize = _v2_walk(
721 repo,
723 repo,
722 includes=includes,
724 includes=includes,
723 excludes=excludes,
725 excludes=excludes,
724 includeobsmarkers=includeobsmarkers,
726 includeobsmarkers=includeobsmarkers,
725 )
727 )
726
728
727 chunks = _emit2(repo, entries, totalfilesize)
729 chunks = _emit2(repo, entries, totalfilesize)
728 first = next(chunks)
730 first = next(chunks)
729 assert first is None
731 assert first is None
730 _test_sync_point_walk_1(repo)
732 _test_sync_point_walk_1(repo)
731 _test_sync_point_walk_2(repo)
733 _test_sync_point_walk_2(repo)
732
734
733 return len(entries), totalfilesize, chunks
735 return len(entries), totalfilesize, chunks
734
736
735
737
736 @contextlib.contextmanager
738 @contextlib.contextmanager
737 def nested(*ctxs):
739 def nested(*ctxs):
738 this = ctxs[0]
740 this = ctxs[0]
739 rest = ctxs[1:]
741 rest = ctxs[1:]
740 with this:
742 with this:
741 if rest:
743 if rest:
742 with nested(*rest):
744 with nested(*rest):
743 yield
745 yield
744 else:
746 else:
745 yield
747 yield
746
748
747
749
748 def consumev2(repo, fp, filecount, filesize):
750 def consumev2(repo, fp, filecount, filesize):
749 """Apply the contents from a version 2 streaming clone.
751 """Apply the contents from a version 2 streaming clone.
750
752
751 Data is read from an object that only needs to provide a ``read(size)``
753 Data is read from an object that only needs to provide a ``read(size)``
752 method.
754 method.
753 """
755 """
754 with repo.lock():
756 with repo.lock():
755 repo.ui.status(
757 repo.ui.status(
756 _(b'%d files to transfer, %s of data\n')
758 _(b'%d files to transfer, %s of data\n')
757 % (filecount, util.bytecount(filesize))
759 % (filecount, util.bytecount(filesize))
758 )
760 )
759
761
760 start = util.timer()
762 start = util.timer()
761 progress = repo.ui.makeprogress(
763 progress = repo.ui.makeprogress(
762 _(b'clone'), total=filesize, unit=_(b'bytes')
764 _(b'clone'), total=filesize, unit=_(b'bytes')
763 )
765 )
764 progress.update(0)
766 progress.update(0)
765
767
766 vfsmap = _makemap(repo)
768 vfsmap = _makemap(repo)
767 # we keep repo.vfs out of the on purpose, ther are too many danger
769 # we keep repo.vfs out of the on purpose, ther are too many danger
768 # there (eg: .hg/hgrc),
770 # there (eg: .hg/hgrc),
769 #
771 #
770 # this assert is duplicated (from _makemap) as author might think this
772 # this assert is duplicated (from _makemap) as author might think this
771 # is fine, while this is really not fine.
773 # is fine, while this is really not fine.
772 if repo.vfs in vfsmap.values():
774 if repo.vfs in vfsmap.values():
773 raise error.ProgrammingError(
775 raise error.ProgrammingError(
774 b'repo.vfs must not be added to vfsmap for security reasons'
776 b'repo.vfs must not be added to vfsmap for security reasons'
775 )
777 )
776
778
777 with repo.transaction(b'clone'):
779 with repo.transaction(b'clone'):
778 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
780 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
779 with nested(*ctxs):
781 with nested(*ctxs):
780 for i in range(filecount):
782 for i in range(filecount):
781 src = util.readexactly(fp, 1)
783 src = util.readexactly(fp, 1)
782 vfs = vfsmap[src]
784 vfs = vfsmap[src]
783 namelen = util.uvarintdecodestream(fp)
785 namelen = util.uvarintdecodestream(fp)
784 datalen = util.uvarintdecodestream(fp)
786 datalen = util.uvarintdecodestream(fp)
785
787
786 name = util.readexactly(fp, namelen)
788 name = util.readexactly(fp, namelen)
787
789
788 if repo.ui.debugflag:
790 if repo.ui.debugflag:
789 repo.ui.debug(
791 repo.ui.debug(
790 b'adding [%s] %s (%s)\n'
792 b'adding [%s] %s (%s)\n'
791 % (src, name, util.bytecount(datalen))
793 % (src, name, util.bytecount(datalen))
792 )
794 )
793
795
794 with vfs(name, b'w') as ofp:
796 with vfs(name, b'w') as ofp:
795 for chunk in util.filechunkiter(fp, limit=datalen):
797 for chunk in util.filechunkiter(fp, limit=datalen):
796 progress.increment(step=len(chunk))
798 progress.increment(step=len(chunk))
797 ofp.write(chunk)
799 ofp.write(chunk)
798
800
799 # force @filecache properties to be reloaded from
801 # force @filecache properties to be reloaded from
800 # streamclone-ed file at next access
802 # streamclone-ed file at next access
801 repo.invalidate(clearfilecache=True)
803 repo.invalidate(clearfilecache=True)
802
804
803 elapsed = util.timer() - start
805 elapsed = util.timer() - start
804 if elapsed <= 0:
806 if elapsed <= 0:
805 elapsed = 0.001
807 elapsed = 0.001
806 repo.ui.status(
808 repo.ui.status(
807 _(b'transferred %s in %.1f seconds (%s/sec)\n')
809 _(b'transferred %s in %.1f seconds (%s/sec)\n')
808 % (
810 % (
809 util.bytecount(progress.pos),
811 util.bytecount(progress.pos),
810 elapsed,
812 elapsed,
811 util.bytecount(progress.pos / elapsed),
813 util.bytecount(progress.pos / elapsed),
812 )
814 )
813 )
815 )
814 progress.complete()
816 progress.complete()
815
817
816
818
817 def applybundlev2(repo, fp, filecount, filesize, requirements):
819 def applybundlev2(repo, fp, filecount, filesize, requirements):
818 from . import localrepo
820 from . import localrepo
819
821
820 missingreqs = [r for r in requirements if r not in repo.supported]
822 missingreqs = [r for r in requirements if r not in repo.supported]
821 if missingreqs:
823 if missingreqs:
822 raise error.Abort(
824 raise error.Abort(
823 _(b'unable to apply stream clone: unsupported format: %s')
825 _(b'unable to apply stream clone: unsupported format: %s')
824 % b', '.join(sorted(missingreqs))
826 % b', '.join(sorted(missingreqs))
825 )
827 )
826
828
827 consumev2(repo, fp, filecount, filesize)
829 consumev2(repo, fp, filecount, filesize)
828
830
829 repo.requirements = new_stream_clone_requirements(
831 repo.requirements = new_stream_clone_requirements(
830 repo.requirements,
832 repo.requirements,
831 requirements,
833 requirements,
832 )
834 )
833 repo.svfs.options = localrepo.resolvestorevfsoptions(
835 repo.svfs.options = localrepo.resolvestorevfsoptions(
834 repo.ui, repo.requirements, repo.features
836 repo.ui, repo.requirements, repo.features
835 )
837 )
836 scmutil.writereporequirements(repo)
838 scmutil.writereporequirements(repo)
837 nodemap.post_stream_cleanup(repo)
839 nodemap.post_stream_cleanup(repo)
838
840
839
841
840 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
842 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
841 hardlink = [True]
843 hardlink = [True]
842
844
843 def copy_used():
845 def copy_used():
844 hardlink[0] = False
846 hardlink[0] = False
845 progress.topic = _(b'copying')
847 progress.topic = _(b'copying')
846
848
847 for k, path, size in entries:
849 for k, path, size in entries:
848 src_vfs = src_vfs_map[k]
850 src_vfs = src_vfs_map[k]
849 dst_vfs = dst_vfs_map[k]
851 dst_vfs = dst_vfs_map[k]
850 src_path = src_vfs.join(path)
852 src_path = src_vfs.join(path)
851 dst_path = dst_vfs.join(path)
853 dst_path = dst_vfs.join(path)
852 # We cannot use dirname and makedirs of dst_vfs here because the store
854 # We cannot use dirname and makedirs of dst_vfs here because the store
853 # encoding confuses them. See issue 6581 for details.
855 # encoding confuses them. See issue 6581 for details.
854 dirname = os.path.dirname(dst_path)
856 dirname = os.path.dirname(dst_path)
855 if not os.path.exists(dirname):
857 if not os.path.exists(dirname):
856 util.makedirs(dirname)
858 util.makedirs(dirname)
857 dst_vfs.register_file(path)
859 dst_vfs.register_file(path)
858 # XXX we could use the #nb_bytes argument.
860 # XXX we could use the #nb_bytes argument.
859 util.copyfile(
861 util.copyfile(
860 src_path,
862 src_path,
861 dst_path,
863 dst_path,
862 hardlink=hardlink[0],
864 hardlink=hardlink[0],
863 no_hardlink_cb=copy_used,
865 no_hardlink_cb=copy_used,
864 check_fs_hardlink=False,
866 check_fs_hardlink=False,
865 )
867 )
866 progress.increment()
868 progress.increment()
867 return hardlink[0]
869 return hardlink[0]
868
870
869
871
870 def local_copy(src_repo, dest_repo):
872 def local_copy(src_repo, dest_repo):
871 """copy all content from one local repository to another
873 """copy all content from one local repository to another
872
874
873 This is useful for local clone"""
875 This is useful for local clone"""
874 src_store_requirements = {
876 src_store_requirements = {
875 r
877 r
876 for r in src_repo.requirements
878 for r in src_repo.requirements
877 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
879 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
878 }
880 }
879 dest_store_requirements = {
881 dest_store_requirements = {
880 r
882 r
881 for r in dest_repo.requirements
883 for r in dest_repo.requirements
882 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
884 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
883 }
885 }
884 assert src_store_requirements == dest_store_requirements
886 assert src_store_requirements == dest_store_requirements
885
887
886 with dest_repo.lock():
888 with dest_repo.lock():
887 with src_repo.lock():
889 with src_repo.lock():
888
890
889 # bookmark is not integrated to the streaming as it might use the
891 # bookmark is not integrated to the streaming as it might use the
890 # `repo.vfs` and they are too many sentitive data accessible
892 # `repo.vfs` and they are too many sentitive data accessible
891 # through `repo.vfs` to expose it to streaming clone.
893 # through `repo.vfs` to expose it to streaming clone.
892 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
894 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
893 srcbookmarks = src_book_vfs.join(b'bookmarks')
895 srcbookmarks = src_book_vfs.join(b'bookmarks')
894 bm_count = 0
896 bm_count = 0
895 if os.path.exists(srcbookmarks):
897 if os.path.exists(srcbookmarks):
896 bm_count = 1
898 bm_count = 1
897
899
898 entries, totalfilesize = _v2_walk(
900 entries, totalfilesize = _v2_walk(
899 src_repo,
901 src_repo,
900 includes=None,
902 includes=None,
901 excludes=None,
903 excludes=None,
902 includeobsmarkers=True,
904 includeobsmarkers=True,
903 )
905 )
904 src_vfs_map = _makemap(src_repo)
906 src_vfs_map = _makemap(src_repo)
905 dest_vfs_map = _makemap(dest_repo)
907 dest_vfs_map = _makemap(dest_repo)
906 progress = src_repo.ui.makeprogress(
908 progress = src_repo.ui.makeprogress(
907 topic=_(b'linking'),
909 topic=_(b'linking'),
908 total=len(entries) + bm_count,
910 total=len(entries) + bm_count,
909 unit=_(b'files'),
911 unit=_(b'files'),
910 )
912 )
911 # copy files
913 # copy files
912 #
914 #
913 # We could copy the full file while the source repository is locked
915 # We could copy the full file while the source repository is locked
914 # and the other one without the lock. However, in the linking case,
916 # and the other one without the lock. However, in the linking case,
915 # this would also requires checks that nobody is appending any data
917 # this would also requires checks that nobody is appending any data
916 # to the files while we do the clone, so this is not done yet. We
918 # to the files while we do the clone, so this is not done yet. We
917 # could do this blindly when copying files.
919 # could do this blindly when copying files.
918 files = ((k, path, size) for k, path, ftype, size in entries)
920 files = ((k, path, size) for k, path, ftype, size in entries)
919 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
921 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
920
922
921 # copy bookmarks over
923 # copy bookmarks over
922 if bm_count:
924 if bm_count:
923 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
925 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
924 dstbookmarks = dst_book_vfs.join(b'bookmarks')
926 dstbookmarks = dst_book_vfs.join(b'bookmarks')
925 util.copyfile(srcbookmarks, dstbookmarks)
927 util.copyfile(srcbookmarks, dstbookmarks)
926 progress.complete()
928 progress.complete()
927 if hardlink:
929 if hardlink:
928 msg = b'linked %d files\n'
930 msg = b'linked %d files\n'
929 else:
931 else:
930 msg = b'copied %d files\n'
932 msg = b'copied %d files\n'
931 src_repo.ui.debug(msg % (len(entries) + bm_count))
933 src_repo.ui.debug(msg % (len(entries) + bm_count))
932
934
933 with dest_repo.transaction(b"localclone") as tr:
935 with dest_repo.transaction(b"localclone") as tr:
934 dest_repo.store.write(tr)
936 dest_repo.store.write(tr)
935
937
936 # clean up transaction file as they do not make sense
938 # clean up transaction file as they do not make sense
937 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
939 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
@@ -1,627 +1,627 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import os
9 import os
10
10
11 from .i18n import _
11 from .i18n import _
12 from .node import short
12 from .node import short
13 from .utils import stringutil
13 from .utils import stringutil
14
14
15 from . import (
15 from . import (
16 error,
16 error,
17 pycompat,
17 pycompat,
18 requirements,
18 requirements,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
46 b"parent-directory manifest refers to unknown revision %s"
47 )
47 )
48
48
49 WARN_UNKNOWN_COPY_SOURCE = _(
49 WARN_UNKNOWN_COPY_SOURCE = _(
50 b"warning: copy source of '%s' not in parents of %s"
50 b"warning: copy source of '%s' not in parents of %s"
51 )
51 )
52
52
53 WARN_NULLID_COPY_SOURCE = _(
53 WARN_NULLID_COPY_SOURCE = _(
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 )
55 )
56
56
57
57
58 class verifier:
58 class verifier:
59 def __init__(self, repo, level=None):
59 def __init__(self, repo, level=None):
60 self.repo = repo.unfiltered()
60 self.repo = repo.unfiltered()
61 self.ui = repo.ui
61 self.ui = repo.ui
62 self.match = repo.narrowmatch()
62 self.match = repo.narrowmatch()
63 if level is None:
63 if level is None:
64 level = VERIFY_DEFAULT
64 level = VERIFY_DEFAULT
65 self._level = level
65 self._level = level
66 self.badrevs = set()
66 self.badrevs = set()
67 self.errors = 0
67 self.errors = 0
68 self.warnings = 0
68 self.warnings = 0
69 self.havecl = len(repo.changelog) > 0
69 self.havecl = len(repo.changelog) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 self.refersmf = False
73 self.refersmf = False
74 self.fncachewarned = False
74 self.fncachewarned = False
75 # developer config: verify.skipflags
75 # developer config: verify.skipflags
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 self.warnorphanstorefiles = True
77 self.warnorphanstorefiles = True
78
78
79 def _warn(self, msg):
79 def _warn(self, msg):
80 """record a "warning" level issue"""
80 """record a "warning" level issue"""
81 self.ui.warn(msg + b"\n")
81 self.ui.warn(msg + b"\n")
82 self.warnings += 1
82 self.warnings += 1
83
83
84 def _err(self, linkrev, msg, filename=None):
84 def _err(self, linkrev, msg, filename=None):
85 """record a "error" level issue"""
85 """record a "error" level issue"""
86 if linkrev is not None:
86 if linkrev is not None:
87 self.badrevs.add(linkrev)
87 self.badrevs.add(linkrev)
88 linkrev = b"%d" % linkrev
88 linkrev = b"%d" % linkrev
89 else:
89 else:
90 linkrev = b'?'
90 linkrev = b'?'
91 msg = b"%s: %s" % (linkrev, msg)
91 msg = b"%s: %s" % (linkrev, msg)
92 if filename:
92 if filename:
93 msg = b"%s@%s" % (filename, msg)
93 msg = b"%s@%s" % (filename, msg)
94 self.ui.warn(b" " + msg + b"\n")
94 self.ui.warn(b" " + msg + b"\n")
95 self.errors += 1
95 self.errors += 1
96
96
97 def _exc(self, linkrev, msg, inst, filename=None):
97 def _exc(self, linkrev, msg, inst, filename=None):
98 """record exception raised during the verify process"""
98 """record exception raised during the verify process"""
99 fmsg = stringutil.forcebytestr(inst)
99 fmsg = stringutil.forcebytestr(inst)
100 if not fmsg:
100 if not fmsg:
101 fmsg = pycompat.byterepr(inst)
101 fmsg = pycompat.byterepr(inst)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103
103
104 def _checkrevlog(self, obj, name, linkrev):
104 def _checkrevlog(self, obj, name, linkrev):
105 """verify high level property of a revlog
105 """verify high level property of a revlog
106
106
107 - revlog is present,
107 - revlog is present,
108 - revlog is non-empty,
108 - revlog is non-empty,
109 - sizes (index and data) are correct,
109 - sizes (index and data) are correct,
110 - revlog's format version is correct.
110 - revlog's format version is correct.
111 """
111 """
112 if not len(obj) and (self.havecl or self.havemf):
112 if not len(obj) and (self.havecl or self.havemf):
113 self._err(linkrev, _(b"empty or missing %s") % name)
113 self._err(linkrev, _(b"empty or missing %s") % name)
114 return
114 return
115
115
116 d = obj.checksize()
116 d = obj.checksize()
117 if d[0]:
117 if d[0]:
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 if d[1]:
119 if d[1]:
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121
121
122 if obj._format_version != revlog.REVLOGV0:
122 if obj._format_version != revlog.REVLOGV0:
123 if not self.revlogv1:
123 if not self.revlogv1:
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 elif self.revlogv1:
125 elif self.revlogv1:
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127
127
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 """verify a single revlog entry
129 """verify a single revlog entry
130
130
131 arguments are:
131 arguments are:
132 - obj: the source revlog
132 - obj: the source revlog
133 - i: the revision number
133 - i: the revision number
134 - node: the revision node id
134 - node: the revision node id
135 - seen: nodes previously seen for this revlog
135 - seen: nodes previously seen for this revlog
136 - linkrevs: [changelog-revisions] introducing "node"
136 - linkrevs: [changelog-revisions] introducing "node"
137 - f: string label ("changelog", "manifest", or filename)
137 - f: string label ("changelog", "manifest", or filename)
138
138
139 Performs the following checks:
139 Performs the following checks:
140 - linkrev points to an existing changelog revision,
140 - linkrev points to an existing changelog revision,
141 - linkrev points to a changelog revision that introduces this revision,
141 - linkrev points to a changelog revision that introduces this revision,
142 - linkrev points to the lowest of these changesets,
142 - linkrev points to the lowest of these changesets,
143 - both parents exist in the revlog,
143 - both parents exist in the revlog,
144 - the revision is not duplicated.
144 - the revision is not duplicated.
145
145
146 Return the linkrev of the revision (or None for changelog's revisions).
146 Return the linkrev of the revision (or None for changelog's revisions).
147 """
147 """
148 lr = obj.linkrev(obj.rev(node))
148 lr = obj.linkrev(obj.rev(node))
149 if lr < 0 or (self.havecl and lr not in linkrevs):
149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 if lr < 0 or lr >= len(self.repo.changelog):
150 if lr < 0 or lr >= len(self.repo.changelog):
151 msg = _(b"rev %d points to nonexistent changeset %d")
151 msg = _(b"rev %d points to nonexistent changeset %d")
152 else:
152 else:
153 msg = _(b"rev %d points to unexpected changeset %d")
153 msg = _(b"rev %d points to unexpected changeset %d")
154 self._err(None, msg % (i, lr), f)
154 self._err(None, msg % (i, lr), f)
155 if linkrevs:
155 if linkrevs:
156 if f and len(linkrevs) > 1:
156 if f and len(linkrevs) > 1:
157 try:
157 try:
158 # attempt to filter down to real linkrevs
158 # attempt to filter down to real linkrevs
159 linkrevs = []
159 linkrevs = []
160 for lr in linkrevs:
160 for lr in linkrevs:
161 if self.lrugetctx(lr)[f].filenode() == node:
161 if self.lrugetctx(lr)[f].filenode() == node:
162 linkrevs.append(lr)
162 linkrevs.append(lr)
163 except Exception:
163 except Exception:
164 pass
164 pass
165 msg = _(b" (expected %s)")
165 msg = _(b" (expected %s)")
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 self._warn(msg)
167 self._warn(msg)
168 lr = None # can't be trusted
168 lr = None # can't be trusted
169
169
170 try:
170 try:
171 p1, p2 = obj.parents(node)
171 p1, p2 = obj.parents(node)
172 if p1 not in seen and p1 != self.repo.nullid:
172 if p1 not in seen and p1 != self.repo.nullid:
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 self._err(lr, msg, f)
174 self._err(lr, msg, f)
175 if p2 not in seen and p2 != self.repo.nullid:
175 if p2 not in seen and p2 != self.repo.nullid:
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 self._err(lr, msg, f)
177 self._err(lr, msg, f)
178 except Exception as inst:
178 except Exception as inst:
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180
180
181 if node in seen:
181 if node in seen:
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 seen[node] = i
183 seen[node] = i
184 return lr
184 return lr
185
185
186 def verify(self):
186 def verify(self):
187 """verify the content of the Mercurial repository
187 """verify the content of the Mercurial repository
188
188
189 This method run all verifications, displaying issues as they are found.
189 This method run all verifications, displaying issues as they are found.
190
190
191 return 1 if any error have been encountered, 0 otherwise."""
191 return 1 if any error have been encountered, 0 otherwise."""
192 # initial validation and generic report
192 # initial validation and generic report
193 repo = self.repo
193 repo = self.repo
194 ui = repo.ui
194 ui = repo.ui
195 if not repo.url().startswith(b'file:'):
195 if not repo.url().startswith(b'file:'):
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197
197
198 if os.path.exists(repo.sjoin(b"journal")):
198 if os.path.exists(repo.sjoin(b"journal")):
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200
200
201 if ui.verbose or not self.revlogv1:
201 if ui.verbose or not self.revlogv1:
202 ui.status(
202 ui.status(
203 _(b"repository uses revlog format %d\n")
203 _(b"repository uses revlog format %d\n")
204 % (self.revlogv1 and 1 or 0)
204 % (self.revlogv1 and 1 or 0)
205 )
205 )
206
206
207 # data verification
207 # data verification
208 mflinkrevs, filelinkrevs = self._verifychangelog()
208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 filenodes = self._verifymanifest(mflinkrevs)
209 filenodes = self._verifymanifest(mflinkrevs)
210 del mflinkrevs
210 del mflinkrevs
211 self._crosscheckfiles(filelinkrevs, filenodes)
211 self._crosscheckfiles(filelinkrevs, filenodes)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213
213
214 if self.errors:
214 if self.errors:
215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
215 ui.warn(_(b"not checking dirstate because of previous errors\n"))
216 dirstate_errors = 0
216 dirstate_errors = 0
217 else:
217 else:
218 dirstate_errors = self._verify_dirstate()
218 dirstate_errors = self._verify_dirstate()
219
219
220 # final report
220 # final report
221 ui.status(
221 ui.status(
222 _(b"checked %d changesets with %d changes to %d files\n")
222 _(b"checked %d changesets with %d changes to %d files\n")
223 % (len(repo.changelog), filerevisions, totalfiles)
223 % (len(repo.changelog), filerevisions, totalfiles)
224 )
224 )
225 if self.warnings:
225 if self.warnings:
226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
226 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
227 if self.fncachewarned:
227 if self.fncachewarned:
228 ui.warn(HINT_FNCACHE)
228 ui.warn(HINT_FNCACHE)
229 if self.errors:
229 if self.errors:
230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
230 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
231 if self.badrevs:
231 if self.badrevs:
232 msg = _(b"(first damaged changeset appears to be %d)\n")
232 msg = _(b"(first damaged changeset appears to be %d)\n")
233 msg %= min(self.badrevs)
233 msg %= min(self.badrevs)
234 ui.warn(msg)
234 ui.warn(msg)
235 if dirstate_errors:
235 if dirstate_errors:
236 ui.warn(
236 ui.warn(
237 _(b"dirstate inconsistent with current parent's manifest\n")
237 _(b"dirstate inconsistent with current parent's manifest\n")
238 )
238 )
239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
239 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
240 return 1
240 return 1
241 return 0
241 return 0
242
242
243 def _verifychangelog(self):
243 def _verifychangelog(self):
244 """verify the changelog of a repository
244 """verify the changelog of a repository
245
245
246 The following checks are performed:
246 The following checks are performed:
247 - all of `_checkrevlog` checks,
247 - all of `_checkrevlog` checks,
248 - all of `_checkentry` checks (for each revisions),
248 - all of `_checkentry` checks (for each revisions),
249 - each revision can be read.
249 - each revision can be read.
250
250
251 The function returns some of the data observed in the changesets as a
251 The function returns some of the data observed in the changesets as a
252 (mflinkrevs, filelinkrevs) tuples:
252 (mflinkrevs, filelinkrevs) tuples:
253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
253 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
254 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
255
255
256 If a matcher was specified, filelinkrevs will only contains matched
256 If a matcher was specified, filelinkrevs will only contains matched
257 files.
257 files.
258 """
258 """
259 ui = self.ui
259 ui = self.ui
260 repo = self.repo
260 repo = self.repo
261 match = self.match
261 match = self.match
262 cl = repo.changelog
262 cl = repo.changelog
263
263
264 ui.status(_(b"checking changesets\n"))
264 ui.status(_(b"checking changesets\n"))
265 mflinkrevs = {}
265 mflinkrevs = {}
266 filelinkrevs = {}
266 filelinkrevs = {}
267 seen = {}
267 seen = {}
268 self._checkrevlog(cl, b"changelog", 0)
268 self._checkrevlog(cl, b"changelog", 0)
269 progress = ui.makeprogress(
269 progress = ui.makeprogress(
270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
270 _(b'checking'), unit=_(b'changesets'), total=len(repo)
271 )
271 )
272 for i in repo:
272 for i in repo:
273 progress.update(i)
273 progress.update(i)
274 n = cl.node(i)
274 n = cl.node(i)
275 self._checkentry(cl, i, n, seen, [i], b"changelog")
275 self._checkentry(cl, i, n, seen, [i], b"changelog")
276
276
277 try:
277 try:
278 changes = cl.read(n)
278 changes = cl.read(n)
279 if changes[0] != self.repo.nullid:
279 if changes[0] != self.repo.nullid:
280 mflinkrevs.setdefault(changes[0], []).append(i)
280 mflinkrevs.setdefault(changes[0], []).append(i)
281 self.refersmf = True
281 self.refersmf = True
282 for f in changes[3]:
282 for f in changes[3]:
283 if match(f):
283 if match(f):
284 filelinkrevs.setdefault(_normpath(f), []).append(i)
284 filelinkrevs.setdefault(_normpath(f), []).append(i)
285 except Exception as inst:
285 except Exception as inst:
286 self.refersmf = True
286 self.refersmf = True
287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
287 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
288 progress.complete()
288 progress.complete()
289 return mflinkrevs, filelinkrevs
289 return mflinkrevs, filelinkrevs
290
290
291 def _verifymanifest(
291 def _verifymanifest(
292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
292 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
293 ):
293 ):
294 """verify the manifestlog content
294 """verify the manifestlog content
295
295
296 Inputs:
296 Inputs:
297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
297 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
298 - dir: a subdirectory to check (for tree manifest repo)
298 - dir: a subdirectory to check (for tree manifest repo)
299 - storefiles: set of currently "orphan" files.
299 - storefiles: set of currently "orphan" files.
300 - subdirprogress: a progress object
300 - subdirprogress: a progress object
301
301
302 This function checks:
302 This function checks:
303 * all of `_checkrevlog` checks (for all manifest related revlogs)
303 * all of `_checkrevlog` checks (for all manifest related revlogs)
304 * all of `_checkentry` checks (for all manifest related revisions)
304 * all of `_checkentry` checks (for all manifest related revisions)
305 * nodes for subdirectory exists in the sub-directory manifest
305 * nodes for subdirectory exists in the sub-directory manifest
306 * each manifest entries have a file path
306 * each manifest entries have a file path
307 * each manifest node refered in mflinkrevs exist in the manifest log
307 * each manifest node refered in mflinkrevs exist in the manifest log
308
308
309 If tree manifest is in use and a matchers is specified, only the
309 If tree manifest is in use and a matchers is specified, only the
310 sub-directories matching it will be verified.
310 sub-directories matching it will be verified.
311
311
312 return a two level mapping:
312 return a two level mapping:
313 {"path" -> { filenode -> changelog-revision}}
313 {"path" -> { filenode -> changelog-revision}}
314
314
315 This mapping primarily contains entries for every files in the
315 This mapping primarily contains entries for every files in the
316 repository. In addition, when tree-manifest is used, it also contains
316 repository. In addition, when tree-manifest is used, it also contains
317 sub-directory entries.
317 sub-directory entries.
318
318
319 If a matcher is provided, only matching paths will be included.
319 If a matcher is provided, only matching paths will be included.
320 """
320 """
321 repo = self.repo
321 repo = self.repo
322 ui = self.ui
322 ui = self.ui
323 match = self.match
323 match = self.match
324 mfl = self.repo.manifestlog
324 mfl = self.repo.manifestlog
325 mf = mfl.getstorage(dir)
325 mf = mfl.getstorage(dir)
326
326
327 if not dir:
327 if not dir:
328 self.ui.status(_(b"checking manifests\n"))
328 self.ui.status(_(b"checking manifests\n"))
329
329
330 filenodes = {}
330 filenodes = {}
331 subdirnodes = {}
331 subdirnodes = {}
332 seen = {}
332 seen = {}
333 label = b"manifest"
333 label = b"manifest"
334 if dir:
334 if dir:
335 label = dir
335 label = dir
336 revlogfiles = mf.files()
336 revlogfiles = mf.files()
337 storefiles.difference_update(revlogfiles)
337 storefiles.difference_update(revlogfiles)
338 if subdirprogress: # should be true since we're in a subdirectory
338 if subdirprogress: # should be true since we're in a subdirectory
339 subdirprogress.increment()
339 subdirprogress.increment()
340 if self.refersmf:
340 if self.refersmf:
341 # Do not check manifest if there are only changelog entries with
341 # Do not check manifest if there are only changelog entries with
342 # null manifests.
342 # null manifests.
343 self._checkrevlog(mf._revlog, label, 0)
343 self._checkrevlog(mf._revlog, label, 0)
344 progress = ui.makeprogress(
344 progress = ui.makeprogress(
345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
345 _(b'checking'), unit=_(b'manifests'), total=len(mf)
346 )
346 )
347 for i in mf:
347 for i in mf:
348 if not dir:
348 if not dir:
349 progress.update(i)
349 progress.update(i)
350 n = mf.node(i)
350 n = mf.node(i)
351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
351 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
352 if n in mflinkrevs:
352 if n in mflinkrevs:
353 del mflinkrevs[n]
353 del mflinkrevs[n]
354 elif dir:
354 elif dir:
355 msg = _(b"%s not in parent-directory manifest") % short(n)
355 msg = _(b"%s not in parent-directory manifest") % short(n)
356 self._err(lr, msg, label)
356 self._err(lr, msg, label)
357 else:
357 else:
358 self._err(lr, _(b"%s not in changesets") % short(n), label)
358 self._err(lr, _(b"%s not in changesets") % short(n), label)
359
359
360 try:
360 try:
361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
361 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
362 for f, fn, fl in mfdelta.iterentries():
362 for f, fn, fl in mfdelta.iterentries():
363 if not f:
363 if not f:
364 self._err(lr, _(b"entry without name in manifest"))
364 self._err(lr, _(b"entry without name in manifest"))
365 elif f == b"/dev/null": # ignore this in very old repos
365 elif f == b"/dev/null": # ignore this in very old repos
366 continue
366 continue
367 fullpath = dir + _normpath(f)
367 fullpath = dir + _normpath(f)
368 if fl == b't':
368 if fl == b't':
369 if not match.visitdir(fullpath):
369 if not match.visitdir(fullpath):
370 continue
370 continue
371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
371 sdn = subdirnodes.setdefault(fullpath + b'/', {})
372 sdn.setdefault(fn, []).append(lr)
372 sdn.setdefault(fn, []).append(lr)
373 else:
373 else:
374 if not match(fullpath):
374 if not match(fullpath):
375 continue
375 continue
376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
376 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
377 except Exception as inst:
377 except Exception as inst:
378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
378 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
379 if self._level >= VERIFY_FULL:
379 if self._level >= VERIFY_FULL:
380 try:
380 try:
381 # Various issues can affect manifest. So we read each full
381 # Various issues can affect manifest. So we read each full
382 # text from storage. This triggers the checks from the core
382 # text from storage. This triggers the checks from the core
383 # code (eg: hash verification, filename are ordered, etc.)
383 # code (eg: hash verification, filename are ordered, etc.)
384 mfdelta = mfl.get(dir, n).read()
384 mfdelta = mfl.get(dir, n).read()
385 except Exception as inst:
385 except Exception as inst:
386 msg = _(b"reading full manifest %s") % short(n)
386 msg = _(b"reading full manifest %s") % short(n)
387 self._exc(lr, msg, inst, label)
387 self._exc(lr, msg, inst, label)
388
388
389 if not dir:
389 if not dir:
390 progress.complete()
390 progress.complete()
391
391
392 if self.havemf:
392 if self.havemf:
393 # since we delete entry in `mflinkrevs` during iteration, any
393 # since we delete entry in `mflinkrevs` during iteration, any
394 # remaining entries are "missing". We need to issue errors for them.
394 # remaining entries are "missing". We need to issue errors for them.
395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
395 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
396 for c, m in sorted(changesetpairs):
396 for c, m in sorted(changesetpairs):
397 if dir:
397 if dir:
398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
398 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
399 else:
399 else:
400 msg = _(b"changeset refers to unknown revision %s")
400 msg = _(b"changeset refers to unknown revision %s")
401 msg %= short(m)
401 msg %= short(m)
402 self._err(c, msg, label)
402 self._err(c, msg, label)
403
403
404 if not dir and subdirnodes:
404 if not dir and subdirnodes:
405 self.ui.status(_(b"checking directory manifests\n"))
405 self.ui.status(_(b"checking directory manifests\n"))
406 storefiles = set()
406 storefiles = set()
407 subdirs = set()
407 subdirs = set()
408 revlogv1 = self.revlogv1
408 revlogv1 = self.revlogv1
409 undecodable = []
409 undecodable = []
410 for entry in repo.store.datafiles(undecodable=undecodable):
410 for entry in repo.store.datafiles(undecodable=undecodable):
411 for file_ in entry.files():
411 for file_ in entry.files():
412 f = file_.unencoded_path
412 f = file_.unencoded_path
413 size = file_.file_size
413 size = file_.file_size(repo.store.vfs)
414 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
414 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
415 storefiles.add(_normpath(f))
415 storefiles.add(_normpath(f))
416 subdirs.add(os.path.dirname(f))
416 subdirs.add(os.path.dirname(f))
417 for f in undecodable:
417 for f in undecodable:
418 self._err(None, _(b"cannot decode filename '%s'") % f)
418 self._err(None, _(b"cannot decode filename '%s'") % f)
419 subdirprogress = ui.makeprogress(
419 subdirprogress = ui.makeprogress(
420 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
420 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
421 )
421 )
422
422
423 for subdir, linkrevs in subdirnodes.items():
423 for subdir, linkrevs in subdirnodes.items():
424 subdirfilenodes = self._verifymanifest(
424 subdirfilenodes = self._verifymanifest(
425 linkrevs, subdir, storefiles, subdirprogress
425 linkrevs, subdir, storefiles, subdirprogress
426 )
426 )
427 for f, onefilenodes in subdirfilenodes.items():
427 for f, onefilenodes in subdirfilenodes.items():
428 filenodes.setdefault(f, {}).update(onefilenodes)
428 filenodes.setdefault(f, {}).update(onefilenodes)
429
429
430 if not dir and subdirnodes:
430 if not dir and subdirnodes:
431 assert subdirprogress is not None # help pytype
431 assert subdirprogress is not None # help pytype
432 subdirprogress.complete()
432 subdirprogress.complete()
433 if self.warnorphanstorefiles:
433 if self.warnorphanstorefiles:
434 for f in sorted(storefiles):
434 for f in sorted(storefiles):
435 self._warn(_(b"warning: orphan data file '%s'") % f)
435 self._warn(_(b"warning: orphan data file '%s'") % f)
436
436
437 return filenodes
437 return filenodes
438
438
439 def _crosscheckfiles(self, filelinkrevs, filenodes):
439 def _crosscheckfiles(self, filelinkrevs, filenodes):
440 repo = self.repo
440 repo = self.repo
441 ui = self.ui
441 ui = self.ui
442 ui.status(_(b"crosschecking files in changesets and manifests\n"))
442 ui.status(_(b"crosschecking files in changesets and manifests\n"))
443
443
444 total = len(filelinkrevs) + len(filenodes)
444 total = len(filelinkrevs) + len(filenodes)
445 progress = ui.makeprogress(
445 progress = ui.makeprogress(
446 _(b'crosschecking'), unit=_(b'files'), total=total
446 _(b'crosschecking'), unit=_(b'files'), total=total
447 )
447 )
448 if self.havemf:
448 if self.havemf:
449 for f in sorted(filelinkrevs):
449 for f in sorted(filelinkrevs):
450 progress.increment()
450 progress.increment()
451 if f not in filenodes:
451 if f not in filenodes:
452 lr = filelinkrevs[f][0]
452 lr = filelinkrevs[f][0]
453 self._err(lr, _(b"in changeset but not in manifest"), f)
453 self._err(lr, _(b"in changeset but not in manifest"), f)
454
454
455 if self.havecl:
455 if self.havecl:
456 for f in sorted(filenodes):
456 for f in sorted(filenodes):
457 progress.increment()
457 progress.increment()
458 if f not in filelinkrevs:
458 if f not in filelinkrevs:
459 try:
459 try:
460 fl = repo.file(f)
460 fl = repo.file(f)
461 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
461 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
462 except Exception:
462 except Exception:
463 lr = None
463 lr = None
464 self._err(lr, _(b"in manifest but not in changeset"), f)
464 self._err(lr, _(b"in manifest but not in changeset"), f)
465
465
466 progress.complete()
466 progress.complete()
467
467
468 def _verifyfiles(self, filenodes, filelinkrevs):
468 def _verifyfiles(self, filenodes, filelinkrevs):
469 repo = self.repo
469 repo = self.repo
470 ui = self.ui
470 ui = self.ui
471 lrugetctx = self.lrugetctx
471 lrugetctx = self.lrugetctx
472 revlogv1 = self.revlogv1
472 revlogv1 = self.revlogv1
473 havemf = self.havemf
473 havemf = self.havemf
474 ui.status(_(b"checking files\n"))
474 ui.status(_(b"checking files\n"))
475
475
476 storefiles = set()
476 storefiles = set()
477 undecodable = []
477 undecodable = []
478 for entry in repo.store.datafiles(undecodable=undecodable):
478 for entry in repo.store.datafiles(undecodable=undecodable):
479 for file_ in entry.files():
479 for file_ in entry.files():
480 size = file_.file_size
480 size = file_.file_size(repo.store.vfs)
481 f = file_.unencoded_path
481 f = file_.unencoded_path
482 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
482 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
483 storefiles.add(_normpath(f))
483 storefiles.add(_normpath(f))
484 for f in undecodable:
484 for f in undecodable:
485 self._err(None, _(b"cannot decode filename '%s'") % f)
485 self._err(None, _(b"cannot decode filename '%s'") % f)
486
486
487 state = {
487 state = {
488 # TODO this assumes revlog storage for changelog.
488 # TODO this assumes revlog storage for changelog.
489 b'expectedversion': self.repo.changelog._format_version,
489 b'expectedversion': self.repo.changelog._format_version,
490 b'skipflags': self.skipflags,
490 b'skipflags': self.skipflags,
491 # experimental config: censor.policy
491 # experimental config: censor.policy
492 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
492 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
493 }
493 }
494
494
495 files = sorted(set(filenodes) | set(filelinkrevs))
495 files = sorted(set(filenodes) | set(filelinkrevs))
496 revisions = 0
496 revisions = 0
497 progress = ui.makeprogress(
497 progress = ui.makeprogress(
498 _(b'checking'), unit=_(b'files'), total=len(files)
498 _(b'checking'), unit=_(b'files'), total=len(files)
499 )
499 )
500 for i, f in enumerate(files):
500 for i, f in enumerate(files):
501 progress.update(i, item=f)
501 progress.update(i, item=f)
502 try:
502 try:
503 linkrevs = filelinkrevs[f]
503 linkrevs = filelinkrevs[f]
504 except KeyError:
504 except KeyError:
505 # in manifest but not in changelog
505 # in manifest but not in changelog
506 linkrevs = []
506 linkrevs = []
507
507
508 if linkrevs:
508 if linkrevs:
509 lr = linkrevs[0]
509 lr = linkrevs[0]
510 else:
510 else:
511 lr = None
511 lr = None
512
512
513 try:
513 try:
514 fl = repo.file(f)
514 fl = repo.file(f)
515 except error.StorageError as e:
515 except error.StorageError as e:
516 self._err(lr, _(b"broken revlog! (%s)") % e, f)
516 self._err(lr, _(b"broken revlog! (%s)") % e, f)
517 continue
517 continue
518
518
519 for ff in fl.files():
519 for ff in fl.files():
520 try:
520 try:
521 storefiles.remove(ff)
521 storefiles.remove(ff)
522 except KeyError:
522 except KeyError:
523 if self.warnorphanstorefiles:
523 if self.warnorphanstorefiles:
524 msg = _(b" warning: revlog '%s' not in fncache!")
524 msg = _(b" warning: revlog '%s' not in fncache!")
525 self._warn(msg % ff)
525 self._warn(msg % ff)
526 self.fncachewarned = True
526 self.fncachewarned = True
527
527
528 if not len(fl) and (self.havecl or self.havemf):
528 if not len(fl) and (self.havecl or self.havemf):
529 self._err(lr, _(b"empty or missing %s") % f)
529 self._err(lr, _(b"empty or missing %s") % f)
530 else:
530 else:
531 # Guard against implementations not setting this.
531 # Guard against implementations not setting this.
532 state[b'skipread'] = set()
532 state[b'skipread'] = set()
533 state[b'safe_renamed'] = set()
533 state[b'safe_renamed'] = set()
534
534
535 for problem in fl.verifyintegrity(state):
535 for problem in fl.verifyintegrity(state):
536 if problem.node is not None:
536 if problem.node is not None:
537 linkrev = fl.linkrev(fl.rev(problem.node))
537 linkrev = fl.linkrev(fl.rev(problem.node))
538 else:
538 else:
539 linkrev = None
539 linkrev = None
540
540
541 if problem.warning:
541 if problem.warning:
542 self._warn(problem.warning)
542 self._warn(problem.warning)
543 elif problem.error:
543 elif problem.error:
544 linkrev_msg = linkrev if linkrev is not None else lr
544 linkrev_msg = linkrev if linkrev is not None else lr
545 self._err(linkrev_msg, problem.error, f)
545 self._err(linkrev_msg, problem.error, f)
546 else:
546 else:
547 raise error.ProgrammingError(
547 raise error.ProgrammingError(
548 b'problem instance does not set warning or error '
548 b'problem instance does not set warning or error '
549 b'attribute: %s' % problem.msg
549 b'attribute: %s' % problem.msg
550 )
550 )
551
551
552 seen = {}
552 seen = {}
553 for i in fl:
553 for i in fl:
554 revisions += 1
554 revisions += 1
555 n = fl.node(i)
555 n = fl.node(i)
556 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
556 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
557 if f in filenodes:
557 if f in filenodes:
558 if havemf and n not in filenodes[f]:
558 if havemf and n not in filenodes[f]:
559 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
559 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
560 else:
560 else:
561 del filenodes[f][n]
561 del filenodes[f][n]
562
562
563 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
563 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
564 continue
564 continue
565
565
566 # check renames
566 # check renames
567 try:
567 try:
568 # This requires resolving fulltext (at least on revlogs,
568 # This requires resolving fulltext (at least on revlogs,
569 # though not with LFS revisions). We may want
569 # though not with LFS revisions). We may want
570 # ``verifyintegrity()`` to pass a set of nodes with
570 # ``verifyintegrity()`` to pass a set of nodes with
571 # rename metadata as an optimization.
571 # rename metadata as an optimization.
572 rp = fl.renamed(n)
572 rp = fl.renamed(n)
573 if rp:
573 if rp:
574 if lr is not None and ui.verbose:
574 if lr is not None and ui.verbose:
575 ctx = lrugetctx(lr)
575 ctx = lrugetctx(lr)
576 if not any(rp[0] in pctx for pctx in ctx.parents()):
576 if not any(rp[0] in pctx for pctx in ctx.parents()):
577 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
577 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
578 fl2 = repo.file(rp[0])
578 fl2 = repo.file(rp[0])
579 if not len(fl2):
579 if not len(fl2):
580 m = _(b"empty or missing copy source revlog %s:%s")
580 m = _(b"empty or missing copy source revlog %s:%s")
581 self._err(lr, m % (rp[0], short(rp[1])), f)
581 self._err(lr, m % (rp[0], short(rp[1])), f)
582 elif rp[1] == self.repo.nullid:
582 elif rp[1] == self.repo.nullid:
583 msg = WARN_NULLID_COPY_SOURCE
583 msg = WARN_NULLID_COPY_SOURCE
584 msg %= (f, lr, rp[0], short(rp[1]))
584 msg %= (f, lr, rp[0], short(rp[1]))
585 ui.note(msg)
585 ui.note(msg)
586 else:
586 else:
587 fl2.rev(rp[1])
587 fl2.rev(rp[1])
588 except Exception as inst:
588 except Exception as inst:
589 self._exc(
589 self._exc(
590 lr, _(b"checking rename of %s") % short(n), inst, f
590 lr, _(b"checking rename of %s") % short(n), inst, f
591 )
591 )
592
592
593 # cross-check
593 # cross-check
594 if f in filenodes:
594 if f in filenodes:
595 fns = [(v, k) for k, v in filenodes[f].items()]
595 fns = [(v, k) for k, v in filenodes[f].items()]
596 for lr, node in sorted(fns):
596 for lr, node in sorted(fns):
597 msg = _(b"manifest refers to unknown revision %s")
597 msg = _(b"manifest refers to unknown revision %s")
598 self._err(lr, msg % short(node), f)
598 self._err(lr, msg % short(node), f)
599 progress.complete()
599 progress.complete()
600
600
601 if self.warnorphanstorefiles:
601 if self.warnorphanstorefiles:
602 for f in sorted(storefiles):
602 for f in sorted(storefiles):
603 self._warn(_(b"warning: orphan data file '%s'") % f)
603 self._warn(_(b"warning: orphan data file '%s'") % f)
604
604
605 return len(files), revisions
605 return len(files), revisions
606
606
607 def _verify_dirstate(self):
607 def _verify_dirstate(self):
608 """Check that the dirstate is consistent with the parent's manifest"""
608 """Check that the dirstate is consistent with the parent's manifest"""
609 repo = self.repo
609 repo = self.repo
610 ui = self.ui
610 ui = self.ui
611 ui.status(_(b"checking dirstate\n"))
611 ui.status(_(b"checking dirstate\n"))
612
612
613 parent1, parent2 = repo.dirstate.parents()
613 parent1, parent2 = repo.dirstate.parents()
614 m1 = repo[parent1].manifest()
614 m1 = repo[parent1].manifest()
615 m2 = repo[parent2].manifest()
615 m2 = repo[parent2].manifest()
616 dirstate_errors = 0
616 dirstate_errors = 0
617
617
618 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
618 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
619 narrow_matcher = repo.narrowmatch() if is_narrow else None
619 narrow_matcher = repo.narrowmatch() if is_narrow else None
620
620
621 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
621 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
622 ui.error(err)
622 ui.error(err)
623 dirstate_errors += 1
623 dirstate_errors += 1
624
624
625 if dirstate_errors:
625 if dirstate_errors:
626 self.errors += dirstate_errors
626 self.errors += dirstate_errors
627 return dirstate_errors
627 return dirstate_errors
General Comments 0
You need to be logged in to leave comments. Login now