##// END OF EJS Templates
stream-clone: make it the responsability of the store entry to stream content...
marmoute -
r51532:5e60abf8 default
parent child Browse files
Show More
@@ -1,1127 +1,1159 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator, List
13 from typing import Generator, List
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 filelog,
22 filelog,
23 manifest,
23 manifest,
24 policy,
24 policy,
25 pycompat,
25 pycompat,
26 util,
26 util,
27 vfs as vfsmod,
27 vfs as vfsmod,
28 )
28 )
29 from .utils import hashutil
29 from .utils import hashutil
30
30
31 parsers = policy.importmod('parsers')
31 parsers = policy.importmod('parsers')
32 # how much bytes should be read from fncache in one read
32 # how much bytes should be read from fncache in one read
33 # It is done to prevent loading large fncache files into memory
33 # It is done to prevent loading large fncache files into memory
34 fncache_chunksize = 10 ** 6
34 fncache_chunksize = 10 ** 6
35
35
36
36
37 def _match_tracked_entry(entry, matcher):
37 def _match_tracked_entry(entry, matcher):
38 """parses a fncache entry and returns whether the entry is tracking a path
38 """parses a fncache entry and returns whether the entry is tracking a path
39 matched by matcher or not.
39 matched by matcher or not.
40
40
41 If matcher is None, returns True"""
41 If matcher is None, returns True"""
42
42
43 if matcher is None:
43 if matcher is None:
44 return True
44 return True
45 if entry.is_filelog:
45 if entry.is_filelog:
46 return matcher(entry.target_id)
46 return matcher(entry.target_id)
47 elif entry.is_manifestlog:
47 elif entry.is_manifestlog:
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50
50
51
51
52 # This avoids a collision between a file named foo and a dir named
52 # This avoids a collision between a file named foo and a dir named
53 # foo.i or foo.d
53 # foo.i or foo.d
54 def _encodedir(path):
54 def _encodedir(path):
55 """
55 """
56 >>> _encodedir(b'data/foo.i')
56 >>> _encodedir(b'data/foo.i')
57 'data/foo.i'
57 'data/foo.i'
58 >>> _encodedir(b'data/foo.i/bla.i')
58 >>> _encodedir(b'data/foo.i/bla.i')
59 'data/foo.i.hg/bla.i'
59 'data/foo.i.hg/bla.i'
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 'data/foo.i.hg.hg/bla.i'
61 'data/foo.i.hg.hg/bla.i'
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 """
64 """
65 return (
65 return (
66 path.replace(b".hg/", b".hg.hg/")
66 path.replace(b".hg/", b".hg.hg/")
67 .replace(b".i/", b".i.hg/")
67 .replace(b".i/", b".i.hg/")
68 .replace(b".d/", b".d.hg/")
68 .replace(b".d/", b".d.hg/")
69 )
69 )
70
70
71
71
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73
73
74
74
75 def decodedir(path):
75 def decodedir(path):
76 """
76 """
77 >>> decodedir(b'data/foo.i')
77 >>> decodedir(b'data/foo.i')
78 'data/foo.i'
78 'data/foo.i'
79 >>> decodedir(b'data/foo.i.hg/bla.i')
79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 'data/foo.i/bla.i'
80 'data/foo.i/bla.i'
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 'data/foo.i.hg/bla.i'
82 'data/foo.i.hg/bla.i'
83 """
83 """
84 if b".hg/" not in path:
84 if b".hg/" not in path:
85 return path
85 return path
86 return (
86 return (
87 path.replace(b".d.hg/", b".d/")
87 path.replace(b".d.hg/", b".d/")
88 .replace(b".i.hg/", b".i/")
88 .replace(b".i.hg/", b".i/")
89 .replace(b".hg.hg/", b".hg/")
89 .replace(b".hg.hg/", b".hg/")
90 )
90 )
91
91
92
92
93 def _reserved():
93 def _reserved():
94 """characters that are problematic for filesystems
94 """characters that are problematic for filesystems
95
95
96 * ascii escapes (0..31)
96 * ascii escapes (0..31)
97 * ascii hi (126..255)
97 * ascii hi (126..255)
98 * windows specials
98 * windows specials
99
99
100 these characters will be escaped by encodefunctions
100 these characters will be escaped by encodefunctions
101 """
101 """
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 for x in range(32):
103 for x in range(32):
104 yield x
104 yield x
105 for x in range(126, 256):
105 for x in range(126, 256):
106 yield x
106 yield x
107 for x in winreserved:
107 for x in winreserved:
108 yield x
108 yield x
109
109
110
110
111 def _buildencodefun():
111 def _buildencodefun():
112 """
112 """
113 >>> enc, dec = _buildencodefun()
113 >>> enc, dec = _buildencodefun()
114
114
115 >>> enc(b'nothing/special.txt')
115 >>> enc(b'nothing/special.txt')
116 'nothing/special.txt'
116 'nothing/special.txt'
117 >>> dec(b'nothing/special.txt')
117 >>> dec(b'nothing/special.txt')
118 'nothing/special.txt'
118 'nothing/special.txt'
119
119
120 >>> enc(b'HELLO')
120 >>> enc(b'HELLO')
121 '_h_e_l_l_o'
121 '_h_e_l_l_o'
122 >>> dec(b'_h_e_l_l_o')
122 >>> dec(b'_h_e_l_l_o')
123 'HELLO'
123 'HELLO'
124
124
125 >>> enc(b'hello:world?')
125 >>> enc(b'hello:world?')
126 'hello~3aworld~3f'
126 'hello~3aworld~3f'
127 >>> dec(b'hello~3aworld~3f')
127 >>> dec(b'hello~3aworld~3f')
128 'hello:world?'
128 'hello:world?'
129
129
130 >>> enc(b'the\\x07quick\\xADshot')
130 >>> enc(b'the\\x07quick\\xADshot')
131 'the~07quick~adshot'
131 'the~07quick~adshot'
132 >>> dec(b'the~07quick~adshot')
132 >>> dec(b'the~07quick~adshot')
133 'the\\x07quick\\xadshot'
133 'the\\x07quick\\xadshot'
134 """
134 """
135 e = b'_'
135 e = b'_'
136 xchr = pycompat.bytechr
136 xchr = pycompat.bytechr
137 asciistr = list(map(xchr, range(127)))
137 asciistr = list(map(xchr, range(127)))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139
139
140 cmap = {x: x for x in asciistr}
140 cmap = {x: x for x in asciistr}
141 for x in _reserved():
141 for x in _reserved():
142 cmap[xchr(x)] = b"~%02x" % x
142 cmap[xchr(x)] = b"~%02x" % x
143 for x in capitals + [ord(e)]:
143 for x in capitals + [ord(e)]:
144 cmap[xchr(x)] = e + xchr(x).lower()
144 cmap[xchr(x)] = e + xchr(x).lower()
145
145
146 dmap = {}
146 dmap = {}
147 for k, v in cmap.items():
147 for k, v in cmap.items():
148 dmap[v] = k
148 dmap[v] = k
149
149
150 def decode(s):
150 def decode(s):
151 i = 0
151 i = 0
152 while i < len(s):
152 while i < len(s):
153 for l in range(1, 4):
153 for l in range(1, 4):
154 try:
154 try:
155 yield dmap[s[i : i + l]]
155 yield dmap[s[i : i + l]]
156 i += l
156 i += l
157 break
157 break
158 except KeyError:
158 except KeyError:
159 pass
159 pass
160 else:
160 else:
161 raise KeyError
161 raise KeyError
162
162
163 return (
163 return (
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join(list(decode(s))),
165 lambda s: b''.join(list(decode(s))),
166 )
166 )
167
167
168
168
169 _encodefname, _decodefname = _buildencodefun()
169 _encodefname, _decodefname = _buildencodefun()
170
170
171
171
172 def encodefilename(s):
172 def encodefilename(s):
173 """
173 """
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 """
176 """
177 return _encodefname(encodedir(s))
177 return _encodefname(encodedir(s))
178
178
179
179
180 def decodefilename(s):
180 def decodefilename(s):
181 """
181 """
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 """
184 """
185 return decodedir(_decodefname(s))
185 return decodedir(_decodefname(s))
186
186
187
187
188 def _buildlowerencodefun():
188 def _buildlowerencodefun():
189 """
189 """
190 >>> f = _buildlowerencodefun()
190 >>> f = _buildlowerencodefun()
191 >>> f(b'nothing/special.txt')
191 >>> f(b'nothing/special.txt')
192 'nothing/special.txt'
192 'nothing/special.txt'
193 >>> f(b'HELLO')
193 >>> f(b'HELLO')
194 'hello'
194 'hello'
195 >>> f(b'hello:world?')
195 >>> f(b'hello:world?')
196 'hello~3aworld~3f'
196 'hello~3aworld~3f'
197 >>> f(b'the\\x07quick\\xADshot')
197 >>> f(b'the\\x07quick\\xADshot')
198 'the~07quick~adshot'
198 'the~07quick~adshot'
199 """
199 """
200 xchr = pycompat.bytechr
200 xchr = pycompat.bytechr
201 cmap = {xchr(x): xchr(x) for x in range(127)}
201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 for x in _reserved():
202 for x in _reserved():
203 cmap[xchr(x)] = b"~%02x" % x
203 cmap[xchr(x)] = b"~%02x" % x
204 for x in range(ord(b"A"), ord(b"Z") + 1):
204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 cmap[xchr(x)] = xchr(x).lower()
205 cmap[xchr(x)] = xchr(x).lower()
206
206
207 def lowerencode(s):
207 def lowerencode(s):
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209
209
210 return lowerencode
210 return lowerencode
211
211
212
212
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214
214
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218
218
219
219
220 def _auxencode(path, dotencode):
220 def _auxencode(path, dotencode):
221 """
221 """
222 Encodes filenames containing names reserved by Windows or which end in
222 Encodes filenames containing names reserved by Windows or which end in
223 period or space. Does not touch other single reserved characters c.
223 period or space. Does not touch other single reserved characters c.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Additionally encodes space or period at the beginning, if dotencode is
225 Additionally encodes space or period at the beginning, if dotencode is
226 True. Parameter path is assumed to be all lowercase.
226 True. Parameter path is assumed to be all lowercase.
227 A segment only needs encoding if a reserved name appears as a
227 A segment only needs encoding if a reserved name appears as a
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 doesn't need encoding.
229 doesn't need encoding.
230
230
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> _auxencode(s.split(b'/'), True)
232 >>> _auxencode(s.split(b'/'), True)
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> _auxencode(s.split(b'/'), False)
235 >>> _auxencode(s.split(b'/'), False)
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 >>> _auxencode([b'foo. '], True)
237 >>> _auxencode([b'foo. '], True)
238 ['foo.~20']
238 ['foo.~20']
239 >>> _auxencode([b' .foo'], True)
239 >>> _auxencode([b' .foo'], True)
240 ['~20.foo']
240 ['~20.foo']
241 """
241 """
242 for i, n in enumerate(path):
242 for i, n in enumerate(path):
243 if not n:
243 if not n:
244 continue
244 continue
245 if dotencode and n[0] in b'. ':
245 if dotencode and n[0] in b'. ':
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 path[i] = n
247 path[i] = n
248 else:
248 else:
249 l = n.find(b'.')
249 l = n.find(b'.')
250 if l == -1:
250 if l == -1:
251 l = len(n)
251 l = len(n)
252 if (l == 3 and n[:3] in _winres3) or (
252 if (l == 3 and n[:3] in _winres3) or (
253 l == 4
253 l == 4
254 and n[3:4] <= b'9'
254 and n[3:4] <= b'9'
255 and n[3:4] >= b'1'
255 and n[3:4] >= b'1'
256 and n[:3] in _winres4
256 and n[:3] in _winres4
257 ):
257 ):
258 # encode third letter ('aux' -> 'au~78')
258 # encode third letter ('aux' -> 'au~78')
259 ec = b"~%02x" % ord(n[2:3])
259 ec = b"~%02x" % ord(n[2:3])
260 n = n[0:2] + ec + n[3:]
260 n = n[0:2] + ec + n[3:]
261 path[i] = n
261 path[i] = n
262 if n[-1] in b'. ':
262 if n[-1] in b'. ':
263 # encode last period or space ('foo...' -> 'foo..~2e')
263 # encode last period or space ('foo...' -> 'foo..~2e')
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 return path
265 return path
266
266
267
267
268 _maxstorepathlen = 120
268 _maxstorepathlen = 120
269 _dirprefixlen = 8
269 _dirprefixlen = 8
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271
271
272
272
273 def _hashencode(path, dotencode):
273 def _hashencode(path, dotencode):
274 digest = hex(hashutil.sha1(path).digest())
274 digest = hex(hashutil.sha1(path).digest())
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 parts = _auxencode(le, dotencode)
276 parts = _auxencode(le, dotencode)
277 basename = parts[-1]
277 basename = parts[-1]
278 _root, ext = os.path.splitext(basename)
278 _root, ext = os.path.splitext(basename)
279 sdirs = []
279 sdirs = []
280 sdirslen = 0
280 sdirslen = 0
281 for p in parts[:-1]:
281 for p in parts[:-1]:
282 d = p[:_dirprefixlen]
282 d = p[:_dirprefixlen]
283 if d[-1] in b'. ':
283 if d[-1] in b'. ':
284 # Windows can't access dirs ending in period or space
284 # Windows can't access dirs ending in period or space
285 d = d[:-1] + b'_'
285 d = d[:-1] + b'_'
286 if sdirslen == 0:
286 if sdirslen == 0:
287 t = len(d)
287 t = len(d)
288 else:
288 else:
289 t = sdirslen + 1 + len(d)
289 t = sdirslen + 1 + len(d)
290 if t > _maxshortdirslen:
290 if t > _maxshortdirslen:
291 break
291 break
292 sdirs.append(d)
292 sdirs.append(d)
293 sdirslen = t
293 sdirslen = t
294 dirs = b'/'.join(sdirs)
294 dirs = b'/'.join(sdirs)
295 if len(dirs) > 0:
295 if len(dirs) > 0:
296 dirs += b'/'
296 dirs += b'/'
297 res = b'dh/' + dirs + digest + ext
297 res = b'dh/' + dirs + digest + ext
298 spaceleft = _maxstorepathlen - len(res)
298 spaceleft = _maxstorepathlen - len(res)
299 if spaceleft > 0:
299 if spaceleft > 0:
300 filler = basename[:spaceleft]
300 filler = basename[:spaceleft]
301 res = b'dh/' + dirs + filler + digest + ext
301 res = b'dh/' + dirs + filler + digest + ext
302 return res
302 return res
303
303
304
304
305 def _hybridencode(path, dotencode):
305 def _hybridencode(path, dotencode):
306 """encodes path with a length limit
306 """encodes path with a length limit
307
307
308 Encodes all paths that begin with 'data/', according to the following.
308 Encodes all paths that begin with 'data/', according to the following.
309
309
310 Default encoding (reversible):
310 Default encoding (reversible):
311
311
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 characters are encoded as '~xx', where xx is the two digit hex code
313 characters are encoded as '~xx', where xx is the two digit hex code
314 of the character (see encodefilename).
314 of the character (see encodefilename).
315 Relevant path components consisting of Windows reserved filenames are
315 Relevant path components consisting of Windows reserved filenames are
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317
317
318 Hashed encoding (not reversible):
318 Hashed encoding (not reversible):
319
319
320 If the default-encoded path is longer than _maxstorepathlen, a
320 If the default-encoded path is longer than _maxstorepathlen, a
321 non-reversible hybrid hashing of the path is done instead.
321 non-reversible hybrid hashing of the path is done instead.
322 This encoding uses up to _dirprefixlen characters of all directory
322 This encoding uses up to _dirprefixlen characters of all directory
323 levels of the lowerencoded path, but not more levels than can fit into
323 levels of the lowerencoded path, but not more levels than can fit into
324 _maxshortdirslen.
324 _maxshortdirslen.
325 Then follows the filler followed by the sha digest of the full path.
325 Then follows the filler followed by the sha digest of the full path.
326 The filler is the beginning of the basename of the lowerencoded path
326 The filler is the beginning of the basename of the lowerencoded path
327 (the basename is everything after the last path separator). The filler
327 (the basename is everything after the last path separator). The filler
328 is as long as possible, filling in characters from the basename until
328 is as long as possible, filling in characters from the basename until
329 the encoded path has _maxstorepathlen characters (or all chars of the
329 the encoded path has _maxstorepathlen characters (or all chars of the
330 basename have been taken).
330 basename have been taken).
331 The extension (e.g. '.i' or '.d') is preserved.
331 The extension (e.g. '.i' or '.d') is preserved.
332
332
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 encoding was used.
334 encoding was used.
335 """
335 """
336 path = encodedir(path)
336 path = encodedir(path)
337 ef = _encodefname(path).split(b'/')
337 ef = _encodefname(path).split(b'/')
338 res = b'/'.join(_auxencode(ef, dotencode))
338 res = b'/'.join(_auxencode(ef, dotencode))
339 if len(res) > _maxstorepathlen:
339 if len(res) > _maxstorepathlen:
340 res = _hashencode(path, dotencode)
340 res = _hashencode(path, dotencode)
341 return res
341 return res
342
342
343
343
344 def _pathencode(path):
344 def _pathencode(path):
345 de = encodedir(path)
345 de = encodedir(path)
346 if len(path) > _maxstorepathlen:
346 if len(path) > _maxstorepathlen:
347 return _hashencode(de, True)
347 return _hashencode(de, True)
348 ef = _encodefname(de).split(b'/')
348 ef = _encodefname(de).split(b'/')
349 res = b'/'.join(_auxencode(ef, True))
349 res = b'/'.join(_auxencode(ef, True))
350 if len(res) > _maxstorepathlen:
350 if len(res) > _maxstorepathlen:
351 return _hashencode(de, True)
351 return _hashencode(de, True)
352 return res
352 return res
353
353
354
354
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356
356
357
357
358 def _plainhybridencode(f):
358 def _plainhybridencode(f):
359 return _hybridencode(f, False)
359 return _hybridencode(f, False)
360
360
361
361
362 def _calcmode(vfs):
362 def _calcmode(vfs):
363 try:
363 try:
364 # files in .hg/ will be created using this mode
364 # files in .hg/ will be created using this mode
365 mode = vfs.stat().st_mode
365 mode = vfs.stat().st_mode
366 # avoid some useless chmods
366 # avoid some useless chmods
367 if (0o777 & ~util.umask) == (0o777 & mode):
367 if (0o777 & ~util.umask) == (0o777 & mode):
368 mode = None
368 mode = None
369 except OSError:
369 except OSError:
370 mode = None
370 mode = None
371 return mode
371 return mode
372
372
373
373
374 _data = [
374 _data = [
375 b'bookmarks',
375 b'bookmarks',
376 b'narrowspec',
376 b'narrowspec',
377 b'data',
377 b'data',
378 b'meta',
378 b'meta',
379 b'00manifest.d',
379 b'00manifest.d',
380 b'00manifest.i',
380 b'00manifest.i',
381 b'00changelog.d',
381 b'00changelog.d',
382 b'00changelog.i',
382 b'00changelog.i',
383 b'phaseroots',
383 b'phaseroots',
384 b'obsstore',
384 b'obsstore',
385 b'requires',
385 b'requires',
386 ]
386 ]
387
387
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_OTHER_EXT = (
389 REVLOG_FILES_OTHER_EXT = (
390 b'.idx',
390 b'.idx',
391 b'.d',
391 b'.d',
392 b'.dat',
392 b'.dat',
393 b'.n',
393 b'.n',
394 b'.nd',
394 b'.nd',
395 b'.sda',
395 b'.sda',
396 )
396 )
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 REVLOG_FILES_LONG_EXT = (
398 REVLOG_FILES_LONG_EXT = (
399 b'.nd',
399 b'.nd',
400 b'.idx',
400 b'.idx',
401 b'.dat',
401 b'.dat',
402 b'.sda',
402 b'.sda',
403 )
403 )
404 # files that are "volatile" and might change between listing and streaming
404 # files that are "volatile" and might change between listing and streaming
405 #
405 #
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 # deleted.
407 # deleted.
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409
409
410 # some exception to the above matching
410 # some exception to the above matching
411 #
411 #
412 # XXX This is currently not in use because of issue6542
412 # XXX This is currently not in use because of issue6542
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414
414
415
415
416 def is_revlog(f, kind, st):
416 def is_revlog(f, kind, st):
417 if kind != stat.S_IFREG:
417 if kind != stat.S_IFREG:
418 return None
418 return None
419 return revlog_type(f)
419 return revlog_type(f)
420
420
421
421
422 def revlog_type(f):
422 def revlog_type(f):
423 # XXX we need to filter `undo.` created by the transaction here, however
423 # XXX we need to filter `undo.` created by the transaction here, however
424 # being naive about it also filter revlog for `undo.*` files, leading to
424 # being naive about it also filter revlog for `undo.*` files, leading to
425 # issue6542. So we no longer use EXCLUDED.
425 # issue6542. So we no longer use EXCLUDED.
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 return FILEFLAGS_REVLOG_MAIN
427 return FILEFLAGS_REVLOG_MAIN
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 t = FILETYPE_FILELOG_OTHER
429 t = FILETYPE_FILELOG_OTHER
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 t |= FILEFLAGS_VOLATILE
431 t |= FILEFLAGS_VOLATILE
432 return t
432 return t
433 return None
433 return None
434
434
435
435
436 # the file is part of changelog data
436 # the file is part of changelog data
437 FILEFLAGS_CHANGELOG = 1 << 13
437 FILEFLAGS_CHANGELOG = 1 << 13
438 # the file is part of manifest data
438 # the file is part of manifest data
439 FILEFLAGS_MANIFESTLOG = 1 << 12
439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 # the file is part of filelog data
440 # the file is part of filelog data
441 FILEFLAGS_FILELOG = 1 << 11
441 FILEFLAGS_FILELOG = 1 << 11
442 # file that are not directly part of a revlog
442 # file that are not directly part of a revlog
443 FILEFLAGS_OTHER = 1 << 10
443 FILEFLAGS_OTHER = 1 << 10
444
444
445 # the main entry point for a revlog
445 # the main entry point for a revlog
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 # a secondary file for a revlog
447 # a secondary file for a revlog
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449
449
450 # files that are "volatile" and might change between listing and streaming
450 # files that are "volatile" and might change between listing and streaming
451 FILEFLAGS_VOLATILE = 1 << 20
451 FILEFLAGS_VOLATILE = 1 << 20
452
452
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
459 FILETYPE_OTHER = FILEFLAGS_OTHER
460
460
461
461
462 @attr.s(slots=True)
462 @attr.s(slots=True)
463 class StoreFile:
463 class StoreFile:
464 """a file matching a store entry"""
464 """a file matching a store entry"""
465
465
466 unencoded_path = attr.ib()
466 unencoded_path = attr.ib()
467 _file_size = attr.ib(default=None)
467 _file_size = attr.ib(default=None)
468 is_volatile = attr.ib(default=False)
468 is_volatile = attr.ib(default=False)
469
469
470 def file_size(self, vfs):
470 def file_size(self, vfs):
471 if self._file_size is None:
471 if self._file_size is None:
472 if vfs is None:
473 msg = b"calling vfs-less file_size without prior call: %s"
474 msg %= self.unencoded_path
475 raise error.ProgrammingError(msg)
472 try:
476 try:
473 self._file_size = vfs.stat(self.unencoded_path).st_size
477 self._file_size = vfs.stat(self.unencoded_path).st_size
474 except FileNotFoundError:
478 except FileNotFoundError:
475 self._file_size = 0
479 self._file_size = 0
476 return self._file_size
480 return self._file_size
477
481
482 def get_stream(self, vfs, copies):
483 """return data "stream" information for this file
484
485 (unencoded_file_path, content_iterator, content_size)
486 """
487 size = self.file_size(None)
488
489 def get_stream():
490 actual_path = copies[vfs.join(self.unencoded_path)]
491 with open(actual_path, 'rb') as fp:
492 yield None # ready to stream
493 if size <= 65536:
494 yield fp.read(size)
495 else:
496 yield from util.filechunkiter(fp, limit=size)
497
498 s = get_stream()
499 next(s)
500 return (self.unencoded_path, s, size)
501
478
502
479 @attr.s(slots=True, init=False)
503 @attr.s(slots=True, init=False)
480 class BaseStoreEntry:
504 class BaseStoreEntry:
481 """An entry in the store
505 """An entry in the store
482
506
483 This is returned by `store.walk` and represent some data in the store."""
507 This is returned by `store.walk` and represent some data in the store."""
484
508
485 def files(self) -> List[StoreFile]:
509 def files(self) -> List[StoreFile]:
486 raise NotImplementedError
510 raise NotImplementedError
487
511
512 def get_streams(self, vfs, copies=None):
513 """return a list of data stream associated to files for this entry
514
515 return [(unencoded_file_path, content_iterator, content_size), …]
516 """
517 assert vfs is not None
518 return [f.get_stream(vfs, copies) for f in self.files()]
519
488
520
489 @attr.s(slots=True, init=False)
521 @attr.s(slots=True, init=False)
490 class SimpleStoreEntry(BaseStoreEntry):
522 class SimpleStoreEntry(BaseStoreEntry):
491 """A generic entry in the store"""
523 """A generic entry in the store"""
492
524
493 is_revlog = False
525 is_revlog = False
494
526
495 _entry_path = attr.ib()
527 _entry_path = attr.ib()
496 _is_volatile = attr.ib(default=False)
528 _is_volatile = attr.ib(default=False)
497 _file_size = attr.ib(default=None)
529 _file_size = attr.ib(default=None)
498 _files = attr.ib(default=None)
530 _files = attr.ib(default=None)
499
531
500 def __init__(
532 def __init__(
501 self,
533 self,
502 entry_path,
534 entry_path,
503 is_volatile=False,
535 is_volatile=False,
504 file_size=None,
536 file_size=None,
505 ):
537 ):
506 super().__init__()
538 super().__init__()
507 self._entry_path = entry_path
539 self._entry_path = entry_path
508 self._is_volatile = is_volatile
540 self._is_volatile = is_volatile
509 self._file_size = file_size
541 self._file_size = file_size
510 self._files = None
542 self._files = None
511
543
512 def files(self) -> List[StoreFile]:
544 def files(self) -> List[StoreFile]:
513 if self._files is None:
545 if self._files is None:
514 self._files = [
546 self._files = [
515 StoreFile(
547 StoreFile(
516 unencoded_path=self._entry_path,
548 unencoded_path=self._entry_path,
517 file_size=self._file_size,
549 file_size=self._file_size,
518 is_volatile=self._is_volatile,
550 is_volatile=self._is_volatile,
519 )
551 )
520 ]
552 ]
521 return self._files
553 return self._files
522
554
523
555
524 @attr.s(slots=True, init=False)
556 @attr.s(slots=True, init=False)
525 class RevlogStoreEntry(BaseStoreEntry):
557 class RevlogStoreEntry(BaseStoreEntry):
526 """A revlog entry in the store"""
558 """A revlog entry in the store"""
527
559
528 is_revlog = True
560 is_revlog = True
529
561
530 revlog_type = attr.ib(default=None)
562 revlog_type = attr.ib(default=None)
531 target_id = attr.ib(default=None)
563 target_id = attr.ib(default=None)
532 _path_prefix = attr.ib(default=None)
564 _path_prefix = attr.ib(default=None)
533 _details = attr.ib(default=None)
565 _details = attr.ib(default=None)
534 _files = attr.ib(default=None)
566 _files = attr.ib(default=None)
535
567
536 def __init__(
568 def __init__(
537 self,
569 self,
538 revlog_type,
570 revlog_type,
539 path_prefix,
571 path_prefix,
540 target_id,
572 target_id,
541 details,
573 details,
542 ):
574 ):
543 super().__init__()
575 super().__init__()
544 self.revlog_type = revlog_type
576 self.revlog_type = revlog_type
545 self.target_id = target_id
577 self.target_id = target_id
546 self._path_prefix = path_prefix
578 self._path_prefix = path_prefix
547 assert b'.i' in details, (path_prefix, details)
579 assert b'.i' in details, (path_prefix, details)
548 self._details = details
580 self._details = details
549 self._files = None
581 self._files = None
550
582
551 @property
583 @property
552 def is_changelog(self):
584 def is_changelog(self):
553 return self.revlog_type & FILEFLAGS_CHANGELOG
585 return self.revlog_type & FILEFLAGS_CHANGELOG
554
586
555 @property
587 @property
556 def is_manifestlog(self):
588 def is_manifestlog(self):
557 return self.revlog_type & FILEFLAGS_MANIFESTLOG
589 return self.revlog_type & FILEFLAGS_MANIFESTLOG
558
590
559 @property
591 @property
560 def is_filelog(self):
592 def is_filelog(self):
561 return self.revlog_type & FILEFLAGS_FILELOG
593 return self.revlog_type & FILEFLAGS_FILELOG
562
594
563 def main_file_path(self):
595 def main_file_path(self):
564 """unencoded path of the main revlog file"""
596 """unencoded path of the main revlog file"""
565 return self._path_prefix + b'.i'
597 return self._path_prefix + b'.i'
566
598
567 def files(self) -> List[StoreFile]:
599 def files(self) -> List[StoreFile]:
568 if self._files is None:
600 if self._files is None:
569 self._files = []
601 self._files = []
570 for ext in sorted(self._details, key=_ext_key):
602 for ext in sorted(self._details, key=_ext_key):
571 path = self._path_prefix + ext
603 path = self._path_prefix + ext
572 data = self._details[ext]
604 data = self._details[ext]
573 self._files.append(StoreFile(unencoded_path=path, **data))
605 self._files.append(StoreFile(unencoded_path=path, **data))
574 return self._files
606 return self._files
575
607
576 def get_revlog_instance(self, repo):
608 def get_revlog_instance(self, repo):
577 """Obtain a revlog instance from this store entry
609 """Obtain a revlog instance from this store entry
578
610
579 An instance of the appropriate class is returned.
611 An instance of the appropriate class is returned.
580 """
612 """
581 if self.is_changelog:
613 if self.is_changelog:
582 return changelog.changelog(repo.svfs)
614 return changelog.changelog(repo.svfs)
583 elif self.is_manifestlog:
615 elif self.is_manifestlog:
584 mandir = self.target_id
616 mandir = self.target_id
585 return manifest.manifestrevlog(
617 return manifest.manifestrevlog(
586 repo.nodeconstants, repo.svfs, tree=mandir
618 repo.nodeconstants, repo.svfs, tree=mandir
587 )
619 )
588 else:
620 else:
589 return filelog.filelog(repo.svfs, self.target_id)
621 return filelog.filelog(repo.svfs, self.target_id)
590
622
591
623
592 def _gather_revlog(files_data):
624 def _gather_revlog(files_data):
593 """group files per revlog prefix
625 """group files per revlog prefix
594
626
595 The returns a two level nested dict. The top level key is the revlog prefix
627 The returns a two level nested dict. The top level key is the revlog prefix
596 without extension, the second level is all the file "suffix" that were
628 without extension, the second level is all the file "suffix" that were
597 seen for this revlog and arbitrary file data as value.
629 seen for this revlog and arbitrary file data as value.
598 """
630 """
599 revlogs = collections.defaultdict(dict)
631 revlogs = collections.defaultdict(dict)
600 for u, value in files_data:
632 for u, value in files_data:
601 name, ext = _split_revlog_ext(u)
633 name, ext = _split_revlog_ext(u)
602 revlogs[name][ext] = value
634 revlogs[name][ext] = value
603 return sorted(revlogs.items())
635 return sorted(revlogs.items())
604
636
605
637
606 def _split_revlog_ext(filename):
638 def _split_revlog_ext(filename):
607 """split the revlog file prefix from the variable extension"""
639 """split the revlog file prefix from the variable extension"""
608 if filename.endswith(REVLOG_FILES_LONG_EXT):
640 if filename.endswith(REVLOG_FILES_LONG_EXT):
609 char = b'-'
641 char = b'-'
610 else:
642 else:
611 char = b'.'
643 char = b'.'
612 idx = filename.rfind(char)
644 idx = filename.rfind(char)
613 return filename[:idx], filename[idx:]
645 return filename[:idx], filename[idx:]
614
646
615
647
616 def _ext_key(ext):
648 def _ext_key(ext):
617 """a key to order revlog suffix
649 """a key to order revlog suffix
618
650
619 important to issue .i after other entry."""
651 important to issue .i after other entry."""
620 # the only important part of this order is to keep the `.i` last.
652 # the only important part of this order is to keep the `.i` last.
621 if ext.endswith(b'.n'):
653 if ext.endswith(b'.n'):
622 return (0, ext)
654 return (0, ext)
623 elif ext.endswith(b'.nd'):
655 elif ext.endswith(b'.nd'):
624 return (10, ext)
656 return (10, ext)
625 elif ext.endswith(b'.d'):
657 elif ext.endswith(b'.d'):
626 return (20, ext)
658 return (20, ext)
627 elif ext.endswith(b'.i'):
659 elif ext.endswith(b'.i'):
628 return (50, ext)
660 return (50, ext)
629 else:
661 else:
630 return (40, ext)
662 return (40, ext)
631
663
632
664
633 class basicstore:
665 class basicstore:
634 '''base class for local repository stores'''
666 '''base class for local repository stores'''
635
667
636 def __init__(self, path, vfstype):
668 def __init__(self, path, vfstype):
637 vfs = vfstype(path)
669 vfs = vfstype(path)
638 self.path = vfs.base
670 self.path = vfs.base
639 self.createmode = _calcmode(vfs)
671 self.createmode = _calcmode(vfs)
640 vfs.createmode = self.createmode
672 vfs.createmode = self.createmode
641 self.rawvfs = vfs
673 self.rawvfs = vfs
642 self.vfs = vfsmod.filtervfs(vfs, encodedir)
674 self.vfs = vfsmod.filtervfs(vfs, encodedir)
643 self.opener = self.vfs
675 self.opener = self.vfs
644
676
645 def join(self, f):
677 def join(self, f):
646 return self.path + b'/' + encodedir(f)
678 return self.path + b'/' + encodedir(f)
647
679
648 def _walk(self, relpath, recurse, undecodable=None):
680 def _walk(self, relpath, recurse, undecodable=None):
649 '''yields (revlog_type, unencoded, size)'''
681 '''yields (revlog_type, unencoded, size)'''
650 path = self.path
682 path = self.path
651 if relpath:
683 if relpath:
652 path += b'/' + relpath
684 path += b'/' + relpath
653 striplen = len(self.path) + 1
685 striplen = len(self.path) + 1
654 l = []
686 l = []
655 if self.rawvfs.isdir(path):
687 if self.rawvfs.isdir(path):
656 visit = [path]
688 visit = [path]
657 readdir = self.rawvfs.readdir
689 readdir = self.rawvfs.readdir
658 while visit:
690 while visit:
659 p = visit.pop()
691 p = visit.pop()
660 for f, kind, st in readdir(p, stat=True):
692 for f, kind, st in readdir(p, stat=True):
661 fp = p + b'/' + f
693 fp = p + b'/' + f
662 rl_type = is_revlog(f, kind, st)
694 rl_type = is_revlog(f, kind, st)
663 if rl_type is not None:
695 if rl_type is not None:
664 n = util.pconvert(fp[striplen:])
696 n = util.pconvert(fp[striplen:])
665 l.append((decodedir(n), (rl_type, st.st_size)))
697 l.append((decodedir(n), (rl_type, st.st_size)))
666 elif kind == stat.S_IFDIR and recurse:
698 elif kind == stat.S_IFDIR and recurse:
667 visit.append(fp)
699 visit.append(fp)
668
700
669 l.sort()
701 l.sort()
670 return l
702 return l
671
703
672 def changelog(self, trypending, concurrencychecker=None):
704 def changelog(self, trypending, concurrencychecker=None):
673 return changelog.changelog(
705 return changelog.changelog(
674 self.vfs,
706 self.vfs,
675 trypending=trypending,
707 trypending=trypending,
676 concurrencychecker=concurrencychecker,
708 concurrencychecker=concurrencychecker,
677 )
709 )
678
710
679 def manifestlog(self, repo, storenarrowmatch):
711 def manifestlog(self, repo, storenarrowmatch):
680 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
712 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
681 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
713 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
682
714
683 def data_entries(
715 def data_entries(
684 self, matcher=None, undecodable=None
716 self, matcher=None, undecodable=None
685 ) -> Generator[BaseStoreEntry, None, None]:
717 ) -> Generator[BaseStoreEntry, None, None]:
686 """Like walk, but excluding the changelog and root manifest.
718 """Like walk, but excluding the changelog and root manifest.
687
719
688 When [undecodable] is None, revlogs names that can't be
720 When [undecodable] is None, revlogs names that can't be
689 decoded cause an exception. When it is provided, it should
721 decoded cause an exception. When it is provided, it should
690 be a list and the filenames that can't be decoded are added
722 be a list and the filenames that can't be decoded are added
691 to it instead. This is very rarely needed."""
723 to it instead. This is very rarely needed."""
692 dirs = [
724 dirs = [
693 (b'data', FILEFLAGS_FILELOG, False),
725 (b'data', FILEFLAGS_FILELOG, False),
694 (b'meta', FILEFLAGS_MANIFESTLOG, True),
726 (b'meta', FILEFLAGS_MANIFESTLOG, True),
695 ]
727 ]
696 for base_dir, rl_type, strip_filename in dirs:
728 for base_dir, rl_type, strip_filename in dirs:
697 files = self._walk(base_dir, True, undecodable=undecodable)
729 files = self._walk(base_dir, True, undecodable=undecodable)
698 files = (f for f in files if f[1][0] is not None)
730 files = (f for f in files if f[1][0] is not None)
699 for revlog, details in _gather_revlog(files):
731 for revlog, details in _gather_revlog(files):
700 file_details = {}
732 file_details = {}
701 revlog_target_id = revlog.split(b'/', 1)[1]
733 revlog_target_id = revlog.split(b'/', 1)[1]
702 if strip_filename and b'/' in revlog:
734 if strip_filename and b'/' in revlog:
703 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
735 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
704 revlog_target_id += b'/'
736 revlog_target_id += b'/'
705 for ext, (t, s) in sorted(details.items()):
737 for ext, (t, s) in sorted(details.items()):
706 file_details[ext] = {
738 file_details[ext] = {
707 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
739 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
708 'file_size': s,
740 'file_size': s,
709 }
741 }
710 yield RevlogStoreEntry(
742 yield RevlogStoreEntry(
711 path_prefix=revlog,
743 path_prefix=revlog,
712 revlog_type=rl_type,
744 revlog_type=rl_type,
713 target_id=revlog_target_id,
745 target_id=revlog_target_id,
714 details=file_details,
746 details=file_details,
715 )
747 )
716
748
717 def top_entries(
749 def top_entries(
718 self, phase=False, obsolescence=False
750 self, phase=False, obsolescence=False
719 ) -> Generator[BaseStoreEntry, None, None]:
751 ) -> Generator[BaseStoreEntry, None, None]:
720 if phase and self.vfs.exists(b'phaseroots'):
752 if phase and self.vfs.exists(b'phaseroots'):
721 yield SimpleStoreEntry(
753 yield SimpleStoreEntry(
722 entry_path=b'phaseroots',
754 entry_path=b'phaseroots',
723 is_volatile=True,
755 is_volatile=True,
724 )
756 )
725
757
726 if obsolescence and self.vfs.exists(b'obsstore'):
758 if obsolescence and self.vfs.exists(b'obsstore'):
727 # XXX if we had the file size it could be non-volatile
759 # XXX if we had the file size it could be non-volatile
728 yield SimpleStoreEntry(
760 yield SimpleStoreEntry(
729 entry_path=b'obsstore',
761 entry_path=b'obsstore',
730 is_volatile=True,
762 is_volatile=True,
731 )
763 )
732
764
733 files = reversed(self._walk(b'', False))
765 files = reversed(self._walk(b'', False))
734
766
735 changelogs = collections.defaultdict(dict)
767 changelogs = collections.defaultdict(dict)
736 manifestlogs = collections.defaultdict(dict)
768 manifestlogs = collections.defaultdict(dict)
737
769
738 for u, (t, s) in files:
770 for u, (t, s) in files:
739 if u.startswith(b'00changelog'):
771 if u.startswith(b'00changelog'):
740 name, ext = _split_revlog_ext(u)
772 name, ext = _split_revlog_ext(u)
741 changelogs[name][ext] = (t, s)
773 changelogs[name][ext] = (t, s)
742 elif u.startswith(b'00manifest'):
774 elif u.startswith(b'00manifest'):
743 name, ext = _split_revlog_ext(u)
775 name, ext = _split_revlog_ext(u)
744 manifestlogs[name][ext] = (t, s)
776 manifestlogs[name][ext] = (t, s)
745 else:
777 else:
746 yield SimpleStoreEntry(
778 yield SimpleStoreEntry(
747 entry_path=u,
779 entry_path=u,
748 is_volatile=bool(t & FILEFLAGS_VOLATILE),
780 is_volatile=bool(t & FILEFLAGS_VOLATILE),
749 file_size=s,
781 file_size=s,
750 )
782 )
751 # yield manifest before changelog
783 # yield manifest before changelog
752 top_rl = [
784 top_rl = [
753 (manifestlogs, FILEFLAGS_MANIFESTLOG),
785 (manifestlogs, FILEFLAGS_MANIFESTLOG),
754 (changelogs, FILEFLAGS_CHANGELOG),
786 (changelogs, FILEFLAGS_CHANGELOG),
755 ]
787 ]
756 assert len(manifestlogs) <= 1
788 assert len(manifestlogs) <= 1
757 assert len(changelogs) <= 1
789 assert len(changelogs) <= 1
758 for data, revlog_type in top_rl:
790 for data, revlog_type in top_rl:
759 for revlog, details in sorted(data.items()):
791 for revlog, details in sorted(data.items()):
760 file_details = {}
792 file_details = {}
761 for ext, (t, s) in details.items():
793 for ext, (t, s) in details.items():
762 file_details[ext] = {
794 file_details[ext] = {
763 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
795 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
764 'file_size': s,
796 'file_size': s,
765 }
797 }
766 yield RevlogStoreEntry(
798 yield RevlogStoreEntry(
767 path_prefix=revlog,
799 path_prefix=revlog,
768 revlog_type=revlog_type,
800 revlog_type=revlog_type,
769 target_id=b'',
801 target_id=b'',
770 details=file_details,
802 details=file_details,
771 )
803 )
772
804
773 def walk(
805 def walk(
774 self, matcher=None, phase=False, obsolescence=False
806 self, matcher=None, phase=False, obsolescence=False
775 ) -> Generator[BaseStoreEntry, None, None]:
807 ) -> Generator[BaseStoreEntry, None, None]:
776 """return files related to data storage (ie: revlogs)
808 """return files related to data storage (ie: revlogs)
777
809
778 yields instance from BaseStoreEntry subclasses
810 yields instance from BaseStoreEntry subclasses
779
811
780 if a matcher is passed, storage files of only those tracked paths
812 if a matcher is passed, storage files of only those tracked paths
781 are passed with matches the matcher
813 are passed with matches the matcher
782 """
814 """
783 # yield data files first
815 # yield data files first
784 for x in self.data_entries(matcher):
816 for x in self.data_entries(matcher):
785 yield x
817 yield x
786 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
818 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
787 yield x
819 yield x
788
820
789 def copylist(self):
821 def copylist(self):
790 return _data
822 return _data
791
823
792 def write(self, tr):
824 def write(self, tr):
793 pass
825 pass
794
826
795 def invalidatecaches(self):
827 def invalidatecaches(self):
796 pass
828 pass
797
829
798 def markremoved(self, fn):
830 def markremoved(self, fn):
799 pass
831 pass
800
832
801 def __contains__(self, path):
833 def __contains__(self, path):
802 '''Checks if the store contains path'''
834 '''Checks if the store contains path'''
803 path = b"/".join((b"data", path))
835 path = b"/".join((b"data", path))
804 # file?
836 # file?
805 if self.vfs.exists(path + b".i"):
837 if self.vfs.exists(path + b".i"):
806 return True
838 return True
807 # dir?
839 # dir?
808 if not path.endswith(b"/"):
840 if not path.endswith(b"/"):
809 path = path + b"/"
841 path = path + b"/"
810 return self.vfs.exists(path)
842 return self.vfs.exists(path)
811
843
812
844
813 class encodedstore(basicstore):
845 class encodedstore(basicstore):
814 def __init__(self, path, vfstype):
846 def __init__(self, path, vfstype):
815 vfs = vfstype(path + b'/store')
847 vfs = vfstype(path + b'/store')
816 self.path = vfs.base
848 self.path = vfs.base
817 self.createmode = _calcmode(vfs)
849 self.createmode = _calcmode(vfs)
818 vfs.createmode = self.createmode
850 vfs.createmode = self.createmode
819 self.rawvfs = vfs
851 self.rawvfs = vfs
820 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
852 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
821 self.opener = self.vfs
853 self.opener = self.vfs
822
854
823 def _walk(self, relpath, recurse, undecodable=None):
855 def _walk(self, relpath, recurse, undecodable=None):
824 old = super()._walk(relpath, recurse)
856 old = super()._walk(relpath, recurse)
825 new = []
857 new = []
826 for f1, value in old:
858 for f1, value in old:
827 try:
859 try:
828 f2 = decodefilename(f1)
860 f2 = decodefilename(f1)
829 except KeyError:
861 except KeyError:
830 if undecodable is None:
862 if undecodable is None:
831 msg = _(b'undecodable revlog name %s') % f1
863 msg = _(b'undecodable revlog name %s') % f1
832 raise error.StorageError(msg)
864 raise error.StorageError(msg)
833 else:
865 else:
834 undecodable.append(f1)
866 undecodable.append(f1)
835 continue
867 continue
836 new.append((f2, value))
868 new.append((f2, value))
837 return new
869 return new
838
870
839 def data_entries(
871 def data_entries(
840 self, matcher=None, undecodable=None
872 self, matcher=None, undecodable=None
841 ) -> Generator[BaseStoreEntry, None, None]:
873 ) -> Generator[BaseStoreEntry, None, None]:
842 entries = super(encodedstore, self).data_entries(
874 entries = super(encodedstore, self).data_entries(
843 undecodable=undecodable
875 undecodable=undecodable
844 )
876 )
845 for entry in entries:
877 for entry in entries:
846 if _match_tracked_entry(entry, matcher):
878 if _match_tracked_entry(entry, matcher):
847 yield entry
879 yield entry
848
880
849 def join(self, f):
881 def join(self, f):
850 return self.path + b'/' + encodefilename(f)
882 return self.path + b'/' + encodefilename(f)
851
883
852 def copylist(self):
884 def copylist(self):
853 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
885 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
854
886
855
887
856 class fncache:
888 class fncache:
857 # the filename used to be partially encoded
889 # the filename used to be partially encoded
858 # hence the encodedir/decodedir dance
890 # hence the encodedir/decodedir dance
859 def __init__(self, vfs):
891 def __init__(self, vfs):
860 self.vfs = vfs
892 self.vfs = vfs
861 self._ignores = set()
893 self._ignores = set()
862 self.entries = None
894 self.entries = None
863 self._dirty = False
895 self._dirty = False
864 # set of new additions to fncache
896 # set of new additions to fncache
865 self.addls = set()
897 self.addls = set()
866
898
867 def ensureloaded(self, warn=None):
899 def ensureloaded(self, warn=None):
868 """read the fncache file if not already read.
900 """read the fncache file if not already read.
869
901
870 If the file on disk is corrupted, raise. If warn is provided,
902 If the file on disk is corrupted, raise. If warn is provided,
871 warn and keep going instead."""
903 warn and keep going instead."""
872 if self.entries is None:
904 if self.entries is None:
873 self._load(warn)
905 self._load(warn)
874
906
875 def _load(self, warn=None):
907 def _load(self, warn=None):
876 '''fill the entries from the fncache file'''
908 '''fill the entries from the fncache file'''
877 self._dirty = False
909 self._dirty = False
878 try:
910 try:
879 fp = self.vfs(b'fncache', mode=b'rb')
911 fp = self.vfs(b'fncache', mode=b'rb')
880 except IOError:
912 except IOError:
881 # skip nonexistent file
913 # skip nonexistent file
882 self.entries = set()
914 self.entries = set()
883 return
915 return
884
916
885 self.entries = set()
917 self.entries = set()
886 chunk = b''
918 chunk = b''
887 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
919 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
888 chunk += c
920 chunk += c
889 try:
921 try:
890 p = chunk.rindex(b'\n')
922 p = chunk.rindex(b'\n')
891 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
923 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
892 chunk = chunk[p + 1 :]
924 chunk = chunk[p + 1 :]
893 except ValueError:
925 except ValueError:
894 # substring '\n' not found, maybe the entry is bigger than the
926 # substring '\n' not found, maybe the entry is bigger than the
895 # chunksize, so let's keep iterating
927 # chunksize, so let's keep iterating
896 pass
928 pass
897
929
898 if chunk:
930 if chunk:
899 msg = _(b"fncache does not ends with a newline")
931 msg = _(b"fncache does not ends with a newline")
900 if warn:
932 if warn:
901 warn(msg + b'\n')
933 warn(msg + b'\n')
902 else:
934 else:
903 raise error.Abort(
935 raise error.Abort(
904 msg,
936 msg,
905 hint=_(
937 hint=_(
906 b"use 'hg debugrebuildfncache' to "
938 b"use 'hg debugrebuildfncache' to "
907 b"rebuild the fncache"
939 b"rebuild the fncache"
908 ),
940 ),
909 )
941 )
910 self._checkentries(fp, warn)
942 self._checkentries(fp, warn)
911 fp.close()
943 fp.close()
912
944
913 def _checkentries(self, fp, warn):
945 def _checkentries(self, fp, warn):
914 """make sure there is no empty string in entries"""
946 """make sure there is no empty string in entries"""
915 if b'' in self.entries:
947 if b'' in self.entries:
916 fp.seek(0)
948 fp.seek(0)
917 for n, line in enumerate(fp):
949 for n, line in enumerate(fp):
918 if not line.rstrip(b'\n'):
950 if not line.rstrip(b'\n'):
919 t = _(b'invalid entry in fncache, line %d') % (n + 1)
951 t = _(b'invalid entry in fncache, line %d') % (n + 1)
920 if warn:
952 if warn:
921 warn(t + b'\n')
953 warn(t + b'\n')
922 else:
954 else:
923 raise error.Abort(t)
955 raise error.Abort(t)
924
956
925 def write(self, tr):
957 def write(self, tr):
926 if self._dirty:
958 if self._dirty:
927 assert self.entries is not None
959 assert self.entries is not None
928 self.entries = self.entries | self.addls
960 self.entries = self.entries | self.addls
929 self.addls = set()
961 self.addls = set()
930 tr.addbackup(b'fncache')
962 tr.addbackup(b'fncache')
931 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
963 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
932 if self.entries:
964 if self.entries:
933 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
965 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
934 fp.close()
966 fp.close()
935 self._dirty = False
967 self._dirty = False
936 if self.addls:
968 if self.addls:
937 # if we have just new entries, let's append them to the fncache
969 # if we have just new entries, let's append them to the fncache
938 tr.addbackup(b'fncache')
970 tr.addbackup(b'fncache')
939 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
971 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
940 if self.addls:
972 if self.addls:
941 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
973 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
942 fp.close()
974 fp.close()
943 self.entries = None
975 self.entries = None
944 self.addls = set()
976 self.addls = set()
945
977
946 def addignore(self, fn):
978 def addignore(self, fn):
947 self._ignores.add(fn)
979 self._ignores.add(fn)
948
980
949 def add(self, fn):
981 def add(self, fn):
950 if fn in self._ignores:
982 if fn in self._ignores:
951 return
983 return
952 if self.entries is None:
984 if self.entries is None:
953 self._load()
985 self._load()
954 if fn not in self.entries:
986 if fn not in self.entries:
955 self.addls.add(fn)
987 self.addls.add(fn)
956
988
957 def remove(self, fn):
989 def remove(self, fn):
958 if self.entries is None:
990 if self.entries is None:
959 self._load()
991 self._load()
960 if fn in self.addls:
992 if fn in self.addls:
961 self.addls.remove(fn)
993 self.addls.remove(fn)
962 return
994 return
963 try:
995 try:
964 self.entries.remove(fn)
996 self.entries.remove(fn)
965 self._dirty = True
997 self._dirty = True
966 except KeyError:
998 except KeyError:
967 pass
999 pass
968
1000
969 def __contains__(self, fn):
1001 def __contains__(self, fn):
970 if fn in self.addls:
1002 if fn in self.addls:
971 return True
1003 return True
972 if self.entries is None:
1004 if self.entries is None:
973 self._load()
1005 self._load()
974 return fn in self.entries
1006 return fn in self.entries
975
1007
976 def __iter__(self):
1008 def __iter__(self):
977 if self.entries is None:
1009 if self.entries is None:
978 self._load()
1010 self._load()
979 return iter(self.entries | self.addls)
1011 return iter(self.entries | self.addls)
980
1012
981
1013
982 class _fncachevfs(vfsmod.proxyvfs):
1014 class _fncachevfs(vfsmod.proxyvfs):
983 def __init__(self, vfs, fnc, encode):
1015 def __init__(self, vfs, fnc, encode):
984 vfsmod.proxyvfs.__init__(self, vfs)
1016 vfsmod.proxyvfs.__init__(self, vfs)
985 self.fncache = fnc
1017 self.fncache = fnc
986 self.encode = encode
1018 self.encode = encode
987
1019
988 def __call__(self, path, mode=b'r', *args, **kw):
1020 def __call__(self, path, mode=b'r', *args, **kw):
989 encoded = self.encode(path)
1021 encoded = self.encode(path)
990 if (
1022 if (
991 mode not in (b'r', b'rb')
1023 mode not in (b'r', b'rb')
992 and (path.startswith(b'data/') or path.startswith(b'meta/'))
1024 and (path.startswith(b'data/') or path.startswith(b'meta/'))
993 and revlog_type(path) is not None
1025 and revlog_type(path) is not None
994 ):
1026 ):
995 # do not trigger a fncache load when adding a file that already is
1027 # do not trigger a fncache load when adding a file that already is
996 # known to exist.
1028 # known to exist.
997 notload = self.fncache.entries is None and self.vfs.exists(encoded)
1029 notload = self.fncache.entries is None and self.vfs.exists(encoded)
998 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
1030 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
999 # when appending to an existing file, if the file has size zero,
1031 # when appending to an existing file, if the file has size zero,
1000 # it should be considered as missing. Such zero-size files are
1032 # it should be considered as missing. Such zero-size files are
1001 # the result of truncation when a transaction is aborted.
1033 # the result of truncation when a transaction is aborted.
1002 notload = False
1034 notload = False
1003 if not notload:
1035 if not notload:
1004 self.fncache.add(path)
1036 self.fncache.add(path)
1005 return self.vfs(encoded, mode, *args, **kw)
1037 return self.vfs(encoded, mode, *args, **kw)
1006
1038
1007 def join(self, path):
1039 def join(self, path):
1008 if path:
1040 if path:
1009 return self.vfs.join(self.encode(path))
1041 return self.vfs.join(self.encode(path))
1010 else:
1042 else:
1011 return self.vfs.join(path)
1043 return self.vfs.join(path)
1012
1044
1013 def register_file(self, path):
1045 def register_file(self, path):
1014 """generic hook point to lets fncache steer its stew"""
1046 """generic hook point to lets fncache steer its stew"""
1015 if path.startswith(b'data/') or path.startswith(b'meta/'):
1047 if path.startswith(b'data/') or path.startswith(b'meta/'):
1016 self.fncache.add(path)
1048 self.fncache.add(path)
1017
1049
1018
1050
1019 class fncachestore(basicstore):
1051 class fncachestore(basicstore):
1020 def __init__(self, path, vfstype, dotencode):
1052 def __init__(self, path, vfstype, dotencode):
1021 if dotencode:
1053 if dotencode:
1022 encode = _pathencode
1054 encode = _pathencode
1023 else:
1055 else:
1024 encode = _plainhybridencode
1056 encode = _plainhybridencode
1025 self.encode = encode
1057 self.encode = encode
1026 vfs = vfstype(path + b'/store')
1058 vfs = vfstype(path + b'/store')
1027 self.path = vfs.base
1059 self.path = vfs.base
1028 self.pathsep = self.path + b'/'
1060 self.pathsep = self.path + b'/'
1029 self.createmode = _calcmode(vfs)
1061 self.createmode = _calcmode(vfs)
1030 vfs.createmode = self.createmode
1062 vfs.createmode = self.createmode
1031 self.rawvfs = vfs
1063 self.rawvfs = vfs
1032 fnc = fncache(vfs)
1064 fnc = fncache(vfs)
1033 self.fncache = fnc
1065 self.fncache = fnc
1034 self.vfs = _fncachevfs(vfs, fnc, encode)
1066 self.vfs = _fncachevfs(vfs, fnc, encode)
1035 self.opener = self.vfs
1067 self.opener = self.vfs
1036
1068
1037 def join(self, f):
1069 def join(self, f):
1038 return self.pathsep + self.encode(f)
1070 return self.pathsep + self.encode(f)
1039
1071
1040 def getsize(self, path):
1072 def getsize(self, path):
1041 return self.rawvfs.stat(path).st_size
1073 return self.rawvfs.stat(path).st_size
1042
1074
1043 def data_entries(
1075 def data_entries(
1044 self, matcher=None, undecodable=None
1076 self, matcher=None, undecodable=None
1045 ) -> Generator[BaseStoreEntry, None, None]:
1077 ) -> Generator[BaseStoreEntry, None, None]:
1046 files = ((f, revlog_type(f)) for f in self.fncache)
1078 files = ((f, revlog_type(f)) for f in self.fncache)
1047 # Note: all files in fncache should be revlog related, However the
1079 # Note: all files in fncache should be revlog related, However the
1048 # fncache might contains such file added by previous version of
1080 # fncache might contains such file added by previous version of
1049 # Mercurial.
1081 # Mercurial.
1050 files = (f for f in files if f[1] is not None)
1082 files = (f for f in files if f[1] is not None)
1051 by_revlog = _gather_revlog(files)
1083 by_revlog = _gather_revlog(files)
1052 for revlog, details in by_revlog:
1084 for revlog, details in by_revlog:
1053 file_details = {}
1085 file_details = {}
1054 if revlog.startswith(b'data/'):
1086 if revlog.startswith(b'data/'):
1055 rl_type = FILEFLAGS_FILELOG
1087 rl_type = FILEFLAGS_FILELOG
1056 revlog_target_id = revlog.split(b'/', 1)[1]
1088 revlog_target_id = revlog.split(b'/', 1)[1]
1057 elif revlog.startswith(b'meta/'):
1089 elif revlog.startswith(b'meta/'):
1058 rl_type = FILEFLAGS_MANIFESTLOG
1090 rl_type = FILEFLAGS_MANIFESTLOG
1059 # drop the initial directory and the `00manifest` file part
1091 # drop the initial directory and the `00manifest` file part
1060 tmp = revlog.split(b'/', 1)[1]
1092 tmp = revlog.split(b'/', 1)[1]
1061 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1093 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1062 else:
1094 else:
1063 # unreachable
1095 # unreachable
1064 assert False, revlog
1096 assert False, revlog
1065 for ext, t in details.items():
1097 for ext, t in details.items():
1066 file_details[ext] = {
1098 file_details[ext] = {
1067 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1099 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1068 }
1100 }
1069 entry = RevlogStoreEntry(
1101 entry = RevlogStoreEntry(
1070 path_prefix=revlog,
1102 path_prefix=revlog,
1071 revlog_type=rl_type,
1103 revlog_type=rl_type,
1072 target_id=revlog_target_id,
1104 target_id=revlog_target_id,
1073 details=file_details,
1105 details=file_details,
1074 )
1106 )
1075 if _match_tracked_entry(entry, matcher):
1107 if _match_tracked_entry(entry, matcher):
1076 yield entry
1108 yield entry
1077
1109
1078 def copylist(self):
1110 def copylist(self):
1079 d = (
1111 d = (
1080 b'bookmarks',
1112 b'bookmarks',
1081 b'narrowspec',
1113 b'narrowspec',
1082 b'data',
1114 b'data',
1083 b'meta',
1115 b'meta',
1084 b'dh',
1116 b'dh',
1085 b'fncache',
1117 b'fncache',
1086 b'phaseroots',
1118 b'phaseroots',
1087 b'obsstore',
1119 b'obsstore',
1088 b'00manifest.d',
1120 b'00manifest.d',
1089 b'00manifest.i',
1121 b'00manifest.i',
1090 b'00changelog.d',
1122 b'00changelog.d',
1091 b'00changelog.i',
1123 b'00changelog.i',
1092 b'requires',
1124 b'requires',
1093 )
1125 )
1094 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1126 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1095
1127
1096 def write(self, tr):
1128 def write(self, tr):
1097 self.fncache.write(tr)
1129 self.fncache.write(tr)
1098
1130
1099 def invalidatecaches(self):
1131 def invalidatecaches(self):
1100 self.fncache.entries = None
1132 self.fncache.entries = None
1101 self.fncache.addls = set()
1133 self.fncache.addls = set()
1102
1134
1103 def markremoved(self, fn):
1135 def markremoved(self, fn):
1104 self.fncache.remove(fn)
1136 self.fncache.remove(fn)
1105
1137
1106 def _exists(self, f):
1138 def _exists(self, f):
1107 ef = self.encode(f)
1139 ef = self.encode(f)
1108 try:
1140 try:
1109 self.getsize(ef)
1141 self.getsize(ef)
1110 return True
1142 return True
1111 except FileNotFoundError:
1143 except FileNotFoundError:
1112 return False
1144 return False
1113
1145
1114 def __contains__(self, path):
1146 def __contains__(self, path):
1115 '''Checks if the store contains path'''
1147 '''Checks if the store contains path'''
1116 path = b"/".join((b"data", path))
1148 path = b"/".join((b"data", path))
1117 # check for files (exact match)
1149 # check for files (exact match)
1118 e = path + b'.i'
1150 e = path + b'.i'
1119 if e in self.fncache and self._exists(e):
1151 if e in self.fncache and self._exists(e):
1120 return True
1152 return True
1121 # now check for directories (prefix match)
1153 # now check for directories (prefix match)
1122 if not path.endswith(b'/'):
1154 if not path.endswith(b'/'):
1123 path += b'/'
1155 path += b'/'
1124 for e in self.fncache:
1156 for e in self.fncache:
1125 if e.startswith(path) and self._exists(e):
1157 if e.startswith(path) and self._exists(e):
1126 return True
1158 return True
1127 return False
1159 return False
@@ -1,978 +1,966 b''
1 # streamclone.py - producing and consuming streaming repository data
1 # streamclone.py - producing and consuming streaming repository data
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import contextlib
9 import contextlib
10 import os
10 import os
11 import struct
11 import struct
12
12
13 from .i18n import _
13 from .i18n import _
14 from .pycompat import open
15 from .interfaces import repository
14 from .interfaces import repository
16 from . import (
15 from . import (
17 bookmarks,
16 bookmarks,
18 bundle2 as bundle2mod,
17 bundle2 as bundle2mod,
19 cacheutil,
18 cacheutil,
20 error,
19 error,
21 narrowspec,
20 narrowspec,
22 phases,
21 phases,
23 pycompat,
22 pycompat,
24 requirements as requirementsmod,
23 requirements as requirementsmod,
25 scmutil,
24 scmutil,
26 store,
25 store,
27 transaction,
26 transaction,
28 util,
27 util,
29 )
28 )
30 from .revlogutils import (
29 from .revlogutils import (
31 nodemap,
30 nodemap,
32 )
31 )
33
32
34
33
35 def new_stream_clone_requirements(default_requirements, streamed_requirements):
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
36 """determine the final set of requirement for a new stream clone
35 """determine the final set of requirement for a new stream clone
37
36
38 this method combine the "default" requirements that a new repository would
37 this method combine the "default" requirements that a new repository would
39 use with the constaint we get from the stream clone content. We keep local
38 use with the constaint we get from the stream clone content. We keep local
40 configuration choice when possible.
39 configuration choice when possible.
41 """
40 """
42 requirements = set(default_requirements)
41 requirements = set(default_requirements)
43 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
44 requirements.update(streamed_requirements)
43 requirements.update(streamed_requirements)
45 return requirements
44 return requirements
46
45
47
46
48 def streamed_requirements(repo):
47 def streamed_requirements(repo):
49 """the set of requirement the new clone will have to support
48 """the set of requirement the new clone will have to support
50
49
51 This is used for advertising the stream options and to generate the actual
50 This is used for advertising the stream options and to generate the actual
52 stream content."""
51 stream content."""
53 requiredformats = (
52 requiredformats = (
54 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
55 )
54 )
56 return requiredformats
55 return requiredformats
57
56
58
57
59 def canperformstreamclone(pullop, bundle2=False):
58 def canperformstreamclone(pullop, bundle2=False):
60 """Whether it is possible to perform a streaming clone as part of pull.
59 """Whether it is possible to perform a streaming clone as part of pull.
61
60
62 ``bundle2`` will cause the function to consider stream clone through
61 ``bundle2`` will cause the function to consider stream clone through
63 bundle2 and only through bundle2.
62 bundle2 and only through bundle2.
64
63
65 Returns a tuple of (supported, requirements). ``supported`` is True if
64 Returns a tuple of (supported, requirements). ``supported`` is True if
66 streaming clone is supported and False otherwise. ``requirements`` is
65 streaming clone is supported and False otherwise. ``requirements`` is
67 a set of repo requirements from the remote, or ``None`` if stream clone
66 a set of repo requirements from the remote, or ``None`` if stream clone
68 isn't supported.
67 isn't supported.
69 """
68 """
70 repo = pullop.repo
69 repo = pullop.repo
71 remote = pullop.remote
70 remote = pullop.remote
72
71
73 # should we consider streaming clone at all ?
72 # should we consider streaming clone at all ?
74 streamrequested = pullop.streamclonerequested
73 streamrequested = pullop.streamclonerequested
75 # If we don't have a preference, let the server decide for us. This
74 # If we don't have a preference, let the server decide for us. This
76 # likely only comes into play in LANs.
75 # likely only comes into play in LANs.
77 if streamrequested is None:
76 if streamrequested is None:
78 # The server can advertise whether to prefer streaming clone.
77 # The server can advertise whether to prefer streaming clone.
79 streamrequested = remote.capable(b'stream-preferred')
78 streamrequested = remote.capable(b'stream-preferred')
80 if not streamrequested:
79 if not streamrequested:
81 return False, None
80 return False, None
82
81
83 # Streaming clone only works on an empty destination repository
82 # Streaming clone only works on an empty destination repository
84 if len(repo):
83 if len(repo):
85 return False, None
84 return False, None
86
85
87 # Streaming clone only works if all data is being requested.
86 # Streaming clone only works if all data is being requested.
88 if pullop.heads:
87 if pullop.heads:
89 return False, None
88 return False, None
90
89
91 bundle2supported = False
90 bundle2supported = False
92 if pullop.canusebundle2:
91 if pullop.canusebundle2:
93 local_caps = bundle2mod.getrepocaps(repo, role=b'client')
92 local_caps = bundle2mod.getrepocaps(repo, role=b'client')
94 local_supported = set(local_caps.get(b'stream', []))
93 local_supported = set(local_caps.get(b'stream', []))
95 remote_supported = set(pullop.remotebundle2caps.get(b'stream', []))
94 remote_supported = set(pullop.remotebundle2caps.get(b'stream', []))
96 bundle2supported = bool(local_supported & remote_supported)
95 bundle2supported = bool(local_supported & remote_supported)
97 # else
96 # else
98 # Server doesn't support bundle2 stream clone or doesn't support
97 # Server doesn't support bundle2 stream clone or doesn't support
99 # the versions we support. Fall back and possibly allow legacy.
98 # the versions we support. Fall back and possibly allow legacy.
100
99
101 # Ensures legacy code path uses available bundle2.
100 # Ensures legacy code path uses available bundle2.
102 if bundle2supported and not bundle2:
101 if bundle2supported and not bundle2:
103 return False, None
102 return False, None
104 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
103 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
105 elif bundle2 and not bundle2supported:
104 elif bundle2 and not bundle2supported:
106 return False, None
105 return False, None
107
106
108 # In order for stream clone to work, the client has to support all the
107 # In order for stream clone to work, the client has to support all the
109 # requirements advertised by the server.
108 # requirements advertised by the server.
110 #
109 #
111 # The server advertises its requirements via the "stream" and "streamreqs"
110 # The server advertises its requirements via the "stream" and "streamreqs"
112 # capability. "stream" (a value-less capability) is advertised if and only
111 # capability. "stream" (a value-less capability) is advertised if and only
113 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
112 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
114 # is advertised and contains a comma-delimited list of requirements.
113 # is advertised and contains a comma-delimited list of requirements.
115 requirements = set()
114 requirements = set()
116 if remote.capable(b'stream'):
115 if remote.capable(b'stream'):
117 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
116 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
118 else:
117 else:
119 streamreqs = remote.capable(b'streamreqs')
118 streamreqs = remote.capable(b'streamreqs')
120 # This is weird and shouldn't happen with modern servers.
119 # This is weird and shouldn't happen with modern servers.
121 if not streamreqs:
120 if not streamreqs:
122 pullop.repo.ui.warn(
121 pullop.repo.ui.warn(
123 _(
122 _(
124 b'warning: stream clone requested but server has them '
123 b'warning: stream clone requested but server has them '
125 b'disabled\n'
124 b'disabled\n'
126 )
125 )
127 )
126 )
128 return False, None
127 return False, None
129
128
130 streamreqs = set(streamreqs.split(b','))
129 streamreqs = set(streamreqs.split(b','))
131 # Server requires something we don't support. Bail.
130 # Server requires something we don't support. Bail.
132 missingreqs = streamreqs - repo.supported
131 missingreqs = streamreqs - repo.supported
133 if missingreqs:
132 if missingreqs:
134 pullop.repo.ui.warn(
133 pullop.repo.ui.warn(
135 _(
134 _(
136 b'warning: stream clone requested but client is missing '
135 b'warning: stream clone requested but client is missing '
137 b'requirements: %s\n'
136 b'requirements: %s\n'
138 )
137 )
139 % b', '.join(sorted(missingreqs))
138 % b', '.join(sorted(missingreqs))
140 )
139 )
141 pullop.repo.ui.warn(
140 pullop.repo.ui.warn(
142 _(
141 _(
143 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
142 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
144 b'for more information)\n'
143 b'for more information)\n'
145 )
144 )
146 )
145 )
147 return False, None
146 return False, None
148 requirements = streamreqs
147 requirements = streamreqs
149
148
150 return True, requirements
149 return True, requirements
151
150
152
151
153 def maybeperformlegacystreamclone(pullop):
152 def maybeperformlegacystreamclone(pullop):
154 """Possibly perform a legacy stream clone operation.
153 """Possibly perform a legacy stream clone operation.
155
154
156 Legacy stream clones are performed as part of pull but before all other
155 Legacy stream clones are performed as part of pull but before all other
157 operations.
156 operations.
158
157
159 A legacy stream clone will not be performed if a bundle2 stream clone is
158 A legacy stream clone will not be performed if a bundle2 stream clone is
160 supported.
159 supported.
161 """
160 """
162 from . import localrepo
161 from . import localrepo
163
162
164 supported, requirements = canperformstreamclone(pullop)
163 supported, requirements = canperformstreamclone(pullop)
165
164
166 if not supported:
165 if not supported:
167 return
166 return
168
167
169 repo = pullop.repo
168 repo = pullop.repo
170 remote = pullop.remote
169 remote = pullop.remote
171
170
172 # Save remote branchmap. We will use it later to speed up branchcache
171 # Save remote branchmap. We will use it later to speed up branchcache
173 # creation.
172 # creation.
174 rbranchmap = None
173 rbranchmap = None
175 if remote.capable(b'branchmap'):
174 if remote.capable(b'branchmap'):
176 with remote.commandexecutor() as e:
175 with remote.commandexecutor() as e:
177 rbranchmap = e.callcommand(b'branchmap', {}).result()
176 rbranchmap = e.callcommand(b'branchmap', {}).result()
178
177
179 repo.ui.status(_(b'streaming all changes\n'))
178 repo.ui.status(_(b'streaming all changes\n'))
180
179
181 with remote.commandexecutor() as e:
180 with remote.commandexecutor() as e:
182 fp = e.callcommand(b'stream_out', {}).result()
181 fp = e.callcommand(b'stream_out', {}).result()
183
182
184 # TODO strictly speaking, this code should all be inside the context
183 # TODO strictly speaking, this code should all be inside the context
185 # manager because the context manager is supposed to ensure all wire state
184 # manager because the context manager is supposed to ensure all wire state
186 # is flushed when exiting. But the legacy peers don't do this, so it
185 # is flushed when exiting. But the legacy peers don't do this, so it
187 # doesn't matter.
186 # doesn't matter.
188 l = fp.readline()
187 l = fp.readline()
189 try:
188 try:
190 resp = int(l)
189 resp = int(l)
191 except ValueError:
190 except ValueError:
192 raise error.ResponseError(
191 raise error.ResponseError(
193 _(b'unexpected response from remote server:'), l
192 _(b'unexpected response from remote server:'), l
194 )
193 )
195 if resp == 1:
194 if resp == 1:
196 raise error.Abort(_(b'operation forbidden by server'))
195 raise error.Abort(_(b'operation forbidden by server'))
197 elif resp == 2:
196 elif resp == 2:
198 raise error.Abort(_(b'locking the remote repository failed'))
197 raise error.Abort(_(b'locking the remote repository failed'))
199 elif resp != 0:
198 elif resp != 0:
200 raise error.Abort(_(b'the server sent an unknown error code'))
199 raise error.Abort(_(b'the server sent an unknown error code'))
201
200
202 l = fp.readline()
201 l = fp.readline()
203 try:
202 try:
204 filecount, bytecount = map(int, l.split(b' ', 1))
203 filecount, bytecount = map(int, l.split(b' ', 1))
205 except (ValueError, TypeError):
204 except (ValueError, TypeError):
206 raise error.ResponseError(
205 raise error.ResponseError(
207 _(b'unexpected response from remote server:'), l
206 _(b'unexpected response from remote server:'), l
208 )
207 )
209
208
210 with repo.lock():
209 with repo.lock():
211 consumev1(repo, fp, filecount, bytecount)
210 consumev1(repo, fp, filecount, bytecount)
212 repo.requirements = new_stream_clone_requirements(
211 repo.requirements = new_stream_clone_requirements(
213 repo.requirements,
212 repo.requirements,
214 requirements,
213 requirements,
215 )
214 )
216 repo.svfs.options = localrepo.resolvestorevfsoptions(
215 repo.svfs.options = localrepo.resolvestorevfsoptions(
217 repo.ui, repo.requirements, repo.features
216 repo.ui, repo.requirements, repo.features
218 )
217 )
219 scmutil.writereporequirements(repo)
218 scmutil.writereporequirements(repo)
220 nodemap.post_stream_cleanup(repo)
219 nodemap.post_stream_cleanup(repo)
221
220
222 if rbranchmap:
221 if rbranchmap:
223 repo._branchcaches.replace(repo, rbranchmap)
222 repo._branchcaches.replace(repo, rbranchmap)
224
223
225 repo.invalidate()
224 repo.invalidate()
226
225
227
226
228 def allowservergeneration(repo):
227 def allowservergeneration(repo):
229 """Whether streaming clones are allowed from the server."""
228 """Whether streaming clones are allowed from the server."""
230 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
229 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
231 return False
230 return False
232
231
233 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
232 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
234 return False
233 return False
235
234
236 # The way stream clone works makes it impossible to hide secret changesets.
235 # The way stream clone works makes it impossible to hide secret changesets.
237 # So don't allow this by default.
236 # So don't allow this by default.
238 secret = phases.hassecret(repo)
237 secret = phases.hassecret(repo)
239 if secret:
238 if secret:
240 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
239 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
241
240
242 return True
241 return True
243
242
244
243
245 # This is it's own function so extensions can override it.
244 # This is it's own function so extensions can override it.
246 def _walkstreamfiles(repo, matcher=None, phase=False, obsolescence=False):
245 def _walkstreamfiles(repo, matcher=None, phase=False, obsolescence=False):
247 return repo.store.walk(matcher, phase=phase, obsolescence=obsolescence)
246 return repo.store.walk(matcher, phase=phase, obsolescence=obsolescence)
248
247
249
248
250 def generatev1(repo):
249 def generatev1(repo):
251 """Emit content for version 1 of a streaming clone.
250 """Emit content for version 1 of a streaming clone.
252
251
253 This returns a 3-tuple of (file count, byte size, data iterator).
252 This returns a 3-tuple of (file count, byte size, data iterator).
254
253
255 The data iterator consists of N entries for each file being transferred.
254 The data iterator consists of N entries for each file being transferred.
256 Each file entry starts as a line with the file name and integer size
255 Each file entry starts as a line with the file name and integer size
257 delimited by a null byte.
256 delimited by a null byte.
258
257
259 The raw file data follows. Following the raw file data is the next file
258 The raw file data follows. Following the raw file data is the next file
260 entry, or EOF.
259 entry, or EOF.
261
260
262 When used on the wire protocol, an additional line indicating protocol
261 When used on the wire protocol, an additional line indicating protocol
263 success will be prepended to the stream. This function is not responsible
262 success will be prepended to the stream. This function is not responsible
264 for adding it.
263 for adding it.
265
264
266 This function will obtain a repository lock to ensure a consistent view of
265 This function will obtain a repository lock to ensure a consistent view of
267 the store is captured. It therefore may raise LockError.
266 the store is captured. It therefore may raise LockError.
268 """
267 """
269 entries = []
268 entries = []
270 total_bytes = 0
269 total_bytes = 0
271 # Get consistent snapshot of repo, lock during scan.
270 # Get consistent snapshot of repo, lock during scan.
272 with repo.lock():
271 with repo.lock():
273 repo.ui.debug(b'scanning\n')
272 repo.ui.debug(b'scanning\n')
274 for entry in _walkstreamfiles(repo):
273 for entry in _walkstreamfiles(repo):
275 for f in entry.files():
274 for f in entry.files():
276 file_size = f.file_size(repo.store.vfs)
275 file_size = f.file_size(repo.store.vfs)
277 if file_size:
276 if file_size:
278 entries.append((f.unencoded_path, file_size))
277 entries.append((f.unencoded_path, file_size))
279 total_bytes += file_size
278 total_bytes += file_size
280 _test_sync_point_walk_1(repo)
279 _test_sync_point_walk_1(repo)
281 _test_sync_point_walk_2(repo)
280 _test_sync_point_walk_2(repo)
282
281
283 repo.ui.debug(
282 repo.ui.debug(
284 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
283 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
285 )
284 )
286
285
287 svfs = repo.svfs
286 svfs = repo.svfs
288 debugflag = repo.ui.debugflag
287 debugflag = repo.ui.debugflag
289
288
290 def emitrevlogdata():
289 def emitrevlogdata():
291 for name, size in entries:
290 for name, size in entries:
292 if debugflag:
291 if debugflag:
293 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
292 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
294 # partially encode name over the wire for backwards compat
293 # partially encode name over the wire for backwards compat
295 yield b'%s\0%d\n' % (store.encodedir(name), size)
294 yield b'%s\0%d\n' % (store.encodedir(name), size)
296 # auditing at this stage is both pointless (paths are already
295 # auditing at this stage is both pointless (paths are already
297 # trusted by the local repo) and expensive
296 # trusted by the local repo) and expensive
298 with svfs(name, b'rb', auditpath=False) as fp:
297 with svfs(name, b'rb', auditpath=False) as fp:
299 if size <= 65536:
298 if size <= 65536:
300 yield fp.read(size)
299 yield fp.read(size)
301 else:
300 else:
302 for chunk in util.filechunkiter(fp, limit=size):
301 for chunk in util.filechunkiter(fp, limit=size):
303 yield chunk
302 yield chunk
304
303
305 return len(entries), total_bytes, emitrevlogdata()
304 return len(entries), total_bytes, emitrevlogdata()
306
305
307
306
308 def generatev1wireproto(repo):
307 def generatev1wireproto(repo):
309 """Emit content for version 1 of streaming clone suitable for the wire.
308 """Emit content for version 1 of streaming clone suitable for the wire.
310
309
311 This is the data output from ``generatev1()`` with 2 header lines. The
310 This is the data output from ``generatev1()`` with 2 header lines. The
312 first line indicates overall success. The 2nd contains the file count and
311 first line indicates overall success. The 2nd contains the file count and
313 byte size of payload.
312 byte size of payload.
314
313
315 The success line contains "0" for success, "1" for stream generation not
314 The success line contains "0" for success, "1" for stream generation not
316 allowed, and "2" for error locking the repository (possibly indicating
315 allowed, and "2" for error locking the repository (possibly indicating
317 a permissions error for the server process).
316 a permissions error for the server process).
318 """
317 """
319 if not allowservergeneration(repo):
318 if not allowservergeneration(repo):
320 yield b'1\n'
319 yield b'1\n'
321 return
320 return
322
321
323 try:
322 try:
324 filecount, bytecount, it = generatev1(repo)
323 filecount, bytecount, it = generatev1(repo)
325 except error.LockError:
324 except error.LockError:
326 yield b'2\n'
325 yield b'2\n'
327 return
326 return
328
327
329 # Indicates successful response.
328 # Indicates successful response.
330 yield b'0\n'
329 yield b'0\n'
331 yield b'%d %d\n' % (filecount, bytecount)
330 yield b'%d %d\n' % (filecount, bytecount)
332 for chunk in it:
331 for chunk in it:
333 yield chunk
332 yield chunk
334
333
335
334
336 def generatebundlev1(repo, compression=b'UN'):
335 def generatebundlev1(repo, compression=b'UN'):
337 """Emit content for version 1 of a stream clone bundle.
336 """Emit content for version 1 of a stream clone bundle.
338
337
339 The first 4 bytes of the output ("HGS1") denote this as stream clone
338 The first 4 bytes of the output ("HGS1") denote this as stream clone
340 bundle version 1.
339 bundle version 1.
341
340
342 The next 2 bytes indicate the compression type. Only "UN" is currently
341 The next 2 bytes indicate the compression type. Only "UN" is currently
343 supported.
342 supported.
344
343
345 The next 16 bytes are two 64-bit big endian unsigned integers indicating
344 The next 16 bytes are two 64-bit big endian unsigned integers indicating
346 file count and byte count, respectively.
345 file count and byte count, respectively.
347
346
348 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
347 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
349 of the requirements string, including a trailing \0. The following N bytes
348 of the requirements string, including a trailing \0. The following N bytes
350 are the requirements string, which is ASCII containing a comma-delimited
349 are the requirements string, which is ASCII containing a comma-delimited
351 list of repo requirements that are needed to support the data.
350 list of repo requirements that are needed to support the data.
352
351
353 The remaining content is the output of ``generatev1()`` (which may be
352 The remaining content is the output of ``generatev1()`` (which may be
354 compressed in the future).
353 compressed in the future).
355
354
356 Returns a tuple of (requirements, data generator).
355 Returns a tuple of (requirements, data generator).
357 """
356 """
358 if compression != b'UN':
357 if compression != b'UN':
359 raise ValueError(b'we do not support the compression argument yet')
358 raise ValueError(b'we do not support the compression argument yet')
360
359
361 requirements = streamed_requirements(repo)
360 requirements = streamed_requirements(repo)
362 requires = b','.join(sorted(requirements))
361 requires = b','.join(sorted(requirements))
363
362
364 def gen():
363 def gen():
365 yield b'HGS1'
364 yield b'HGS1'
366 yield compression
365 yield compression
367
366
368 filecount, bytecount, it = generatev1(repo)
367 filecount, bytecount, it = generatev1(repo)
369 repo.ui.status(
368 repo.ui.status(
370 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
369 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
371 )
370 )
372
371
373 yield struct.pack(b'>QQ', filecount, bytecount)
372 yield struct.pack(b'>QQ', filecount, bytecount)
374 yield struct.pack(b'>H', len(requires) + 1)
373 yield struct.pack(b'>H', len(requires) + 1)
375 yield requires + b'\0'
374 yield requires + b'\0'
376
375
377 # This is where we'll add compression in the future.
376 # This is where we'll add compression in the future.
378 assert compression == b'UN'
377 assert compression == b'UN'
379
378
380 progress = repo.ui.makeprogress(
379 progress = repo.ui.makeprogress(
381 _(b'bundle'), total=bytecount, unit=_(b'bytes')
380 _(b'bundle'), total=bytecount, unit=_(b'bytes')
382 )
381 )
383 progress.update(0)
382 progress.update(0)
384
383
385 for chunk in it:
384 for chunk in it:
386 progress.increment(step=len(chunk))
385 progress.increment(step=len(chunk))
387 yield chunk
386 yield chunk
388
387
389 progress.complete()
388 progress.complete()
390
389
391 return requirements, gen()
390 return requirements, gen()
392
391
393
392
394 def consumev1(repo, fp, filecount, bytecount):
393 def consumev1(repo, fp, filecount, bytecount):
395 """Apply the contents from version 1 of a streaming clone file handle.
394 """Apply the contents from version 1 of a streaming clone file handle.
396
395
397 This takes the output from "stream_out" and applies it to the specified
396 This takes the output from "stream_out" and applies it to the specified
398 repository.
397 repository.
399
398
400 Like "stream_out," the status line added by the wire protocol is not
399 Like "stream_out," the status line added by the wire protocol is not
401 handled by this function.
400 handled by this function.
402 """
401 """
403 with repo.lock():
402 with repo.lock():
404 repo.ui.status(
403 repo.ui.status(
405 _(b'%d files to transfer, %s of data\n')
404 _(b'%d files to transfer, %s of data\n')
406 % (filecount, util.bytecount(bytecount))
405 % (filecount, util.bytecount(bytecount))
407 )
406 )
408 progress = repo.ui.makeprogress(
407 progress = repo.ui.makeprogress(
409 _(b'clone'), total=bytecount, unit=_(b'bytes')
408 _(b'clone'), total=bytecount, unit=_(b'bytes')
410 )
409 )
411 progress.update(0)
410 progress.update(0)
412 start = util.timer()
411 start = util.timer()
413
412
414 # TODO: get rid of (potential) inconsistency
413 # TODO: get rid of (potential) inconsistency
415 #
414 #
416 # If transaction is started and any @filecache property is
415 # If transaction is started and any @filecache property is
417 # changed at this point, it causes inconsistency between
416 # changed at this point, it causes inconsistency between
418 # in-memory cached property and streamclone-ed file on the
417 # in-memory cached property and streamclone-ed file on the
419 # disk. Nested transaction prevents transaction scope "clone"
418 # disk. Nested transaction prevents transaction scope "clone"
420 # below from writing in-memory changes out at the end of it,
419 # below from writing in-memory changes out at the end of it,
421 # even though in-memory changes are discarded at the end of it
420 # even though in-memory changes are discarded at the end of it
422 # regardless of transaction nesting.
421 # regardless of transaction nesting.
423 #
422 #
424 # But transaction nesting can't be simply prohibited, because
423 # But transaction nesting can't be simply prohibited, because
425 # nesting occurs also in ordinary case (e.g. enabling
424 # nesting occurs also in ordinary case (e.g. enabling
426 # clonebundles).
425 # clonebundles).
427
426
428 with repo.transaction(b'clone'):
427 with repo.transaction(b'clone'):
429 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
428 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
430 for i in range(filecount):
429 for i in range(filecount):
431 # XXX doesn't support '\n' or '\r' in filenames
430 # XXX doesn't support '\n' or '\r' in filenames
432 l = fp.readline()
431 l = fp.readline()
433 try:
432 try:
434 name, size = l.split(b'\0', 1)
433 name, size = l.split(b'\0', 1)
435 size = int(size)
434 size = int(size)
436 except (ValueError, TypeError):
435 except (ValueError, TypeError):
437 raise error.ResponseError(
436 raise error.ResponseError(
438 _(b'unexpected response from remote server:'), l
437 _(b'unexpected response from remote server:'), l
439 )
438 )
440 if repo.ui.debugflag:
439 if repo.ui.debugflag:
441 repo.ui.debug(
440 repo.ui.debug(
442 b'adding %s (%s)\n' % (name, util.bytecount(size))
441 b'adding %s (%s)\n' % (name, util.bytecount(size))
443 )
442 )
444 # for backwards compat, name was partially encoded
443 # for backwards compat, name was partially encoded
445 path = store.decodedir(name)
444 path = store.decodedir(name)
446 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
445 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
447 for chunk in util.filechunkiter(fp, limit=size):
446 for chunk in util.filechunkiter(fp, limit=size):
448 progress.increment(step=len(chunk))
447 progress.increment(step=len(chunk))
449 ofp.write(chunk)
448 ofp.write(chunk)
450
449
451 # force @filecache properties to be reloaded from
450 # force @filecache properties to be reloaded from
452 # streamclone-ed file at next access
451 # streamclone-ed file at next access
453 repo.invalidate(clearfilecache=True)
452 repo.invalidate(clearfilecache=True)
454
453
455 elapsed = util.timer() - start
454 elapsed = util.timer() - start
456 if elapsed <= 0:
455 if elapsed <= 0:
457 elapsed = 0.001
456 elapsed = 0.001
458 progress.complete()
457 progress.complete()
459 repo.ui.status(
458 repo.ui.status(
460 _(b'transferred %s in %.1f seconds (%s/sec)\n')
459 _(b'transferred %s in %.1f seconds (%s/sec)\n')
461 % (
460 % (
462 util.bytecount(bytecount),
461 util.bytecount(bytecount),
463 elapsed,
462 elapsed,
464 util.bytecount(bytecount / elapsed),
463 util.bytecount(bytecount / elapsed),
465 )
464 )
466 )
465 )
467
466
468
467
469 def readbundle1header(fp):
468 def readbundle1header(fp):
470 compression = fp.read(2)
469 compression = fp.read(2)
471 if compression != b'UN':
470 if compression != b'UN':
472 raise error.Abort(
471 raise error.Abort(
473 _(
472 _(
474 b'only uncompressed stream clone bundles are '
473 b'only uncompressed stream clone bundles are '
475 b'supported; got %s'
474 b'supported; got %s'
476 )
475 )
477 % compression
476 % compression
478 )
477 )
479
478
480 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
479 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
481 requireslen = struct.unpack(b'>H', fp.read(2))[0]
480 requireslen = struct.unpack(b'>H', fp.read(2))[0]
482 requires = fp.read(requireslen)
481 requires = fp.read(requireslen)
483
482
484 if not requires.endswith(b'\0'):
483 if not requires.endswith(b'\0'):
485 raise error.Abort(
484 raise error.Abort(
486 _(
485 _(
487 b'malformed stream clone bundle: '
486 b'malformed stream clone bundle: '
488 b'requirements not properly encoded'
487 b'requirements not properly encoded'
489 )
488 )
490 )
489 )
491
490
492 requirements = set(requires.rstrip(b'\0').split(b','))
491 requirements = set(requires.rstrip(b'\0').split(b','))
493
492
494 return filecount, bytecount, requirements
493 return filecount, bytecount, requirements
495
494
496
495
497 def applybundlev1(repo, fp):
496 def applybundlev1(repo, fp):
498 """Apply the content from a stream clone bundle version 1.
497 """Apply the content from a stream clone bundle version 1.
499
498
500 We assume the 4 byte header has been read and validated and the file handle
499 We assume the 4 byte header has been read and validated and the file handle
501 is at the 2 byte compression identifier.
500 is at the 2 byte compression identifier.
502 """
501 """
503 if len(repo):
502 if len(repo):
504 raise error.Abort(
503 raise error.Abort(
505 _(b'cannot apply stream clone bundle on non-empty repo')
504 _(b'cannot apply stream clone bundle on non-empty repo')
506 )
505 )
507
506
508 filecount, bytecount, requirements = readbundle1header(fp)
507 filecount, bytecount, requirements = readbundle1header(fp)
509 missingreqs = requirements - repo.supported
508 missingreqs = requirements - repo.supported
510 if missingreqs:
509 if missingreqs:
511 raise error.Abort(
510 raise error.Abort(
512 _(b'unable to apply stream clone: unsupported format: %s')
511 _(b'unable to apply stream clone: unsupported format: %s')
513 % b', '.join(sorted(missingreqs))
512 % b', '.join(sorted(missingreqs))
514 )
513 )
515
514
516 consumev1(repo, fp, filecount, bytecount)
515 consumev1(repo, fp, filecount, bytecount)
517 nodemap.post_stream_cleanup(repo)
516 nodemap.post_stream_cleanup(repo)
518
517
519
518
520 class streamcloneapplier:
519 class streamcloneapplier:
521 """Class to manage applying streaming clone bundles.
520 """Class to manage applying streaming clone bundles.
522
521
523 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
522 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
524 readers to perform bundle type-specific functionality.
523 readers to perform bundle type-specific functionality.
525 """
524 """
526
525
527 def __init__(self, fh):
526 def __init__(self, fh):
528 self._fh = fh
527 self._fh = fh
529
528
530 def apply(self, repo):
529 def apply(self, repo):
531 return applybundlev1(repo, self._fh)
530 return applybundlev1(repo, self._fh)
532
531
533
532
534 # type of file to stream
533 # type of file to stream
535 _fileappend = 0 # append only file
534 _fileappend = 0 # append only file
536 _filefull = 1 # full snapshot file
535 _filefull = 1 # full snapshot file
537
536
538 # Source of the file
537 # Source of the file
539 _srcstore = b's' # store (svfs)
538 _srcstore = b's' # store (svfs)
540 _srccache = b'c' # cache (cache)
539 _srccache = b'c' # cache (cache)
541
540
542 # This is it's own function so extensions can override it.
541 # This is it's own function so extensions can override it.
543 def _walkstreamfullstorefiles(repo):
542 def _walkstreamfullstorefiles(repo):
544 """list snapshot file from the store"""
543 """list snapshot file from the store"""
545 fnames = []
544 fnames = []
546 if not repo.publishing():
545 if not repo.publishing():
547 fnames.append(b'phaseroots')
546 fnames.append(b'phaseroots')
548 return fnames
547 return fnames
549
548
550
549
551 def _filterfull(entry, copy, vfsmap):
550 def _filterfull(entry, copy, vfsmap):
552 """actually copy the snapshot files"""
551 """actually copy the snapshot files"""
553 src, name, ftype, data = entry
552 src, name, ftype, data = entry
554 if ftype != _filefull:
553 if ftype != _filefull:
555 return entry
554 return entry
556 return (src, name, ftype, copy(vfsmap[src].join(name)))
555 return (src, name, ftype, copy(vfsmap[src].join(name)))
557
556
558
557
559 class TempCopyManager:
558 class TempCopyManager:
560 """Manage temporary backup of volatile file during stream clone
559 """Manage temporary backup of volatile file during stream clone
561
560
562 This should be used as a Python context, the copies will be discarded when
561 This should be used as a Python context, the copies will be discarded when
563 exiting the context.
562 exiting the context.
564
563
565 A copy can be done by calling the object on the real path (encoded full
564 A copy can be done by calling the object on the real path (encoded full
566 path)
565 path)
567
566
568 The backup path can be retrieved using the __getitem__ protocol, obj[path].
567 The backup path can be retrieved using the __getitem__ protocol, obj[path].
569 On file without backup, it will return the unmodified path. (equivalent to
568 On file without backup, it will return the unmodified path. (equivalent to
570 `dict.get(x, x)`)
569 `dict.get(x, x)`)
571 """
570 """
572
571
573 def __init__(self):
572 def __init__(self):
574 self._copies = None
573 self._copies = None
575 self._dst_dir = None
574 self._dst_dir = None
576
575
577 def __enter__(self):
576 def __enter__(self):
578 if self._copies is not None:
577 if self._copies is not None:
579 msg = "Copies context already open"
578 msg = "Copies context already open"
580 raise error.ProgrammingError(msg)
579 raise error.ProgrammingError(msg)
581 self._copies = {}
580 self._copies = {}
582 self._dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
581 self._dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
583 return self
582 return self
584
583
585 def __call__(self, src):
584 def __call__(self, src):
586 """create a backup of the file at src"""
585 """create a backup of the file at src"""
587 prefix = os.path.basename(src)
586 prefix = os.path.basename(src)
588 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
587 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
589 os.close(fd)
588 os.close(fd)
590 self._copies[src] = dst
589 self._copies[src] = dst
591 util.copyfiles(src, dst, hardlink=True)
590 util.copyfiles(src, dst, hardlink=True)
592 return dst
591 return dst
593
592
594 def __getitem__(self, src):
593 def __getitem__(self, src):
595 """return the path to a valid version of `src`
594 """return the path to a valid version of `src`
596
595
597 If the file has no backup, the path of the file is returned
596 If the file has no backup, the path of the file is returned
598 unmodified."""
597 unmodified."""
599 return self._copies.get(src, src)
598 return self._copies.get(src, src)
600
599
601 def __exit__(self, *args, **kwars):
600 def __exit__(self, *args, **kwars):
602 """discard all backups"""
601 """discard all backups"""
603 for tmp in self._copies.values():
602 for tmp in self._copies.values():
604 util.tryunlink(tmp)
603 util.tryunlink(tmp)
605 util.tryrmdir(self._dst_dir)
604 util.tryrmdir(self._dst_dir)
606 self._copies = None
605 self._copies = None
607 self._dst_dir = None
606 self._dst_dir = None
608
607
609
608
610 def _makemap(repo):
609 def _makemap(repo):
611 """make a (src -> vfs) map for the repo"""
610 """make a (src -> vfs) map for the repo"""
612 vfsmap = {
611 vfsmap = {
613 _srcstore: repo.svfs,
612 _srcstore: repo.svfs,
614 _srccache: repo.cachevfs,
613 _srccache: repo.cachevfs,
615 }
614 }
616 # we keep repo.vfs out of the on purpose, ther are too many danger there
615 # we keep repo.vfs out of the on purpose, ther are too many danger there
617 # (eg: .hg/hgrc)
616 # (eg: .hg/hgrc)
618 assert repo.vfs not in vfsmap.values()
617 assert repo.vfs not in vfsmap.values()
619
618
620 return vfsmap
619 return vfsmap
621
620
622
621
623 def _emit2(repo, entries):
622 def _emit2(repo, entries):
624 """actually emit the stream bundle"""
623 """actually emit the stream bundle"""
625 vfsmap = _makemap(repo)
624 vfsmap = _makemap(repo)
626 # we keep repo.vfs out of the on purpose, ther are too many danger there
625 # we keep repo.vfs out of the on purpose, ther are too many danger there
627 # (eg: .hg/hgrc),
626 # (eg: .hg/hgrc),
628 #
627 #
629 # this assert is duplicated (from _makemap) as author might think this is
628 # this assert is duplicated (from _makemap) as author might think this is
630 # fine, while this is really not fine.
629 # fine, while this is really not fine.
631 if repo.vfs in vfsmap.values():
630 if repo.vfs in vfsmap.values():
632 raise error.ProgrammingError(
631 raise error.ProgrammingError(
633 b'repo.vfs must not be added to vfsmap for security reasons'
632 b'repo.vfs must not be added to vfsmap for security reasons'
634 )
633 )
635
634
636 # translate the vfs one
635 # translate the vfs one
637 entries = [(vfs_key, vfsmap[vfs_key], e) for (vfs_key, e) in entries]
636 entries = [(vfs_key, vfsmap[vfs_key], e) for (vfs_key, e) in entries]
638
637
639 file_count = totalfilesize = 0
638 file_count = totalfilesize = 0
640 # record the expected size of every file
639 # record the expected size of every file
641 for k, vfs, e in entries:
640 for k, vfs, e in entries:
642 for f in e.files():
641 for f in e.files():
643 file_count += 1
642 file_count += 1
644 totalfilesize += f.file_size(vfs)
643 totalfilesize += f.file_size(vfs)
645
644
646 progress = repo.ui.makeprogress(
645 progress = repo.ui.makeprogress(
647 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
646 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
648 )
647 )
649 progress.update(0)
648 progress.update(0)
650 with TempCopyManager() as copy, progress:
649 with TempCopyManager() as copy, progress:
651 # create a copy of volatile files
650 # create a copy of volatile files
652 for k, vfs, e in entries:
651 for k, vfs, e in entries:
653 for f in e.files():
652 for f in e.files():
654 if f.is_volatile:
653 if f.is_volatile:
655 copy(vfs.join(f.unencoded_path))
654 copy(vfs.join(f.unencoded_path))
656 # the first yield release the lock on the repository
655 # the first yield release the lock on the repository
657 yield file_count, totalfilesize
656 yield file_count, totalfilesize
658 totalbytecount = 0
657 totalbytecount = 0
659
658
660 for src, vfs, e in entries:
659 for src, vfs, e in entries:
661 for f in e.files():
660 for name, stream, size in e.get_streams(vfs, copies=copy):
662 yield src
661 yield src
663 name = f.unencoded_path
664 yield util.uvarintencode(len(name))
662 yield util.uvarintencode(len(name))
665 actual_path = copy[vfs.join(name)]
666 fp = open(actual_path, b'rb')
667 size = f.file_size(vfs)
668 bytecount = 0
669 try:
670 yield util.uvarintencode(size)
663 yield util.uvarintencode(size)
671 yield name
664 yield name
672 if size <= 65536:
665 bytecount = 0
673 chunks = (fp.read(size),)
666 for chunk in stream:
674 else:
675 chunks = util.filechunkiter(fp, limit=size)
676 for chunk in chunks:
677 bytecount += len(chunk)
667 bytecount += len(chunk)
678 totalbytecount += len(chunk)
668 totalbytecount += len(chunk)
679 progress.update(totalbytecount)
669 progress.update(totalbytecount)
680 yield chunk
670 yield chunk
681 if bytecount != size:
671 if bytecount != size:
682 # Would most likely be caused by a race due to `hg
672 # Would most likely be caused by a race due to `hg
683 # strip` or a revlog split
673 # strip` or a revlog split
684 msg = _(
674 msg = _(
685 b'clone could only read %d bytes from %s, but '
675 b'clone could only read %d bytes from %s, but '
686 b'expected %d bytes'
676 b'expected %d bytes'
687 )
677 )
688 raise error.Abort(msg % (bytecount, name, size))
678 raise error.Abort(msg % (bytecount, name, size))
689 finally:
690 fp.close()
691
679
692
680
693 def _test_sync_point_walk_1(repo):
681 def _test_sync_point_walk_1(repo):
694 """a function for synchronisation during tests"""
682 """a function for synchronisation during tests"""
695
683
696
684
697 def _test_sync_point_walk_2(repo):
685 def _test_sync_point_walk_2(repo):
698 """a function for synchronisation during tests"""
686 """a function for synchronisation during tests"""
699
687
700
688
701 def _entries_walk(repo, includes, excludes, includeobsmarkers):
689 def _entries_walk(repo, includes, excludes, includeobsmarkers):
702 """emit a seris of files information useful to clone a repo
690 """emit a seris of files information useful to clone a repo
703
691
704 return (vfs-key, entry) iterator
692 return (vfs-key, entry) iterator
705
693
706 Where `entry` is StoreEntry. (used even for cache entries)
694 Where `entry` is StoreEntry. (used even for cache entries)
707 """
695 """
708 assert repo._currentlock(repo._lockref) is not None
696 assert repo._currentlock(repo._lockref) is not None
709
697
710 matcher = None
698 matcher = None
711 if includes or excludes:
699 if includes or excludes:
712 matcher = narrowspec.match(repo.root, includes, excludes)
700 matcher = narrowspec.match(repo.root, includes, excludes)
713
701
714 phase = not repo.publishing()
702 phase = not repo.publishing()
715 entries = _walkstreamfiles(
703 entries = _walkstreamfiles(
716 repo,
704 repo,
717 matcher,
705 matcher,
718 phase=phase,
706 phase=phase,
719 obsolescence=includeobsmarkers,
707 obsolescence=includeobsmarkers,
720 )
708 )
721 for entry in entries:
709 for entry in entries:
722 yield (_srcstore, entry)
710 yield (_srcstore, entry)
723
711
724 for name in cacheutil.cachetocopy(repo):
712 for name in cacheutil.cachetocopy(repo):
725 if repo.cachevfs.exists(name):
713 if repo.cachevfs.exists(name):
726 # not really a StoreEntry, but close enough
714 # not really a StoreEntry, but close enough
727 entry = store.SimpleStoreEntry(
715 entry = store.SimpleStoreEntry(
728 entry_path=name,
716 entry_path=name,
729 is_volatile=True,
717 is_volatile=True,
730 )
718 )
731 yield (_srccache, entry)
719 yield (_srccache, entry)
732
720
733
721
734 def generatev2(repo, includes, excludes, includeobsmarkers):
722 def generatev2(repo, includes, excludes, includeobsmarkers):
735 """Emit content for version 2 of a streaming clone.
723 """Emit content for version 2 of a streaming clone.
736
724
737 the data stream consists the following entries:
725 the data stream consists the following entries:
738 1) A char representing the file destination (eg: store or cache)
726 1) A char representing the file destination (eg: store or cache)
739 2) A varint containing the length of the filename
727 2) A varint containing the length of the filename
740 3) A varint containing the length of file data
728 3) A varint containing the length of file data
741 4) N bytes containing the filename (the internal, store-agnostic form)
729 4) N bytes containing the filename (the internal, store-agnostic form)
742 5) N bytes containing the file data
730 5) N bytes containing the file data
743
731
744 Returns a 3-tuple of (file count, file size, data iterator).
732 Returns a 3-tuple of (file count, file size, data iterator).
745 """
733 """
746
734
747 with repo.lock():
735 with repo.lock():
748
736
749 repo.ui.debug(b'scanning\n')
737 repo.ui.debug(b'scanning\n')
750
738
751 entries = _entries_walk(
739 entries = _entries_walk(
752 repo,
740 repo,
753 includes=includes,
741 includes=includes,
754 excludes=excludes,
742 excludes=excludes,
755 includeobsmarkers=includeobsmarkers,
743 includeobsmarkers=includeobsmarkers,
756 )
744 )
757
745
758 chunks = _emit2(repo, entries)
746 chunks = _emit2(repo, entries)
759 first = next(chunks)
747 first = next(chunks)
760 file_count, total_file_size = first
748 file_count, total_file_size = first
761 _test_sync_point_walk_1(repo)
749 _test_sync_point_walk_1(repo)
762 _test_sync_point_walk_2(repo)
750 _test_sync_point_walk_2(repo)
763
751
764 return file_count, total_file_size, chunks
752 return file_count, total_file_size, chunks
765
753
766
754
767 def generatev3(repo, includes, excludes, includeobsmarkers):
755 def generatev3(repo, includes, excludes, includeobsmarkers):
768 return generatev2(repo, includes, excludes, includeobsmarkers)
756 return generatev2(repo, includes, excludes, includeobsmarkers)
769
757
770
758
771 @contextlib.contextmanager
759 @contextlib.contextmanager
772 def nested(*ctxs):
760 def nested(*ctxs):
773 this = ctxs[0]
761 this = ctxs[0]
774 rest = ctxs[1:]
762 rest = ctxs[1:]
775 with this:
763 with this:
776 if rest:
764 if rest:
777 with nested(*rest):
765 with nested(*rest):
778 yield
766 yield
779 else:
767 else:
780 yield
768 yield
781
769
782
770
783 def consumev2(repo, fp, filecount, filesize):
771 def consumev2(repo, fp, filecount, filesize):
784 """Apply the contents from a version 2 streaming clone.
772 """Apply the contents from a version 2 streaming clone.
785
773
786 Data is read from an object that only needs to provide a ``read(size)``
774 Data is read from an object that only needs to provide a ``read(size)``
787 method.
775 method.
788 """
776 """
789 with repo.lock():
777 with repo.lock():
790 repo.ui.status(
778 repo.ui.status(
791 _(b'%d files to transfer, %s of data\n')
779 _(b'%d files to transfer, %s of data\n')
792 % (filecount, util.bytecount(filesize))
780 % (filecount, util.bytecount(filesize))
793 )
781 )
794
782
795 start = util.timer()
783 start = util.timer()
796 progress = repo.ui.makeprogress(
784 progress = repo.ui.makeprogress(
797 _(b'clone'), total=filesize, unit=_(b'bytes')
785 _(b'clone'), total=filesize, unit=_(b'bytes')
798 )
786 )
799 progress.update(0)
787 progress.update(0)
800
788
801 vfsmap = _makemap(repo)
789 vfsmap = _makemap(repo)
802 # we keep repo.vfs out of the on purpose, ther are too many danger
790 # we keep repo.vfs out of the on purpose, ther are too many danger
803 # there (eg: .hg/hgrc),
791 # there (eg: .hg/hgrc),
804 #
792 #
805 # this assert is duplicated (from _makemap) as author might think this
793 # this assert is duplicated (from _makemap) as author might think this
806 # is fine, while this is really not fine.
794 # is fine, while this is really not fine.
807 if repo.vfs in vfsmap.values():
795 if repo.vfs in vfsmap.values():
808 raise error.ProgrammingError(
796 raise error.ProgrammingError(
809 b'repo.vfs must not be added to vfsmap for security reasons'
797 b'repo.vfs must not be added to vfsmap for security reasons'
810 )
798 )
811
799
812 with repo.transaction(b'clone'):
800 with repo.transaction(b'clone'):
813 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
801 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
814 with nested(*ctxs):
802 with nested(*ctxs):
815 for i in range(filecount):
803 for i in range(filecount):
816 src = util.readexactly(fp, 1)
804 src = util.readexactly(fp, 1)
817 vfs = vfsmap[src]
805 vfs = vfsmap[src]
818 namelen = util.uvarintdecodestream(fp)
806 namelen = util.uvarintdecodestream(fp)
819 datalen = util.uvarintdecodestream(fp)
807 datalen = util.uvarintdecodestream(fp)
820
808
821 name = util.readexactly(fp, namelen)
809 name = util.readexactly(fp, namelen)
822
810
823 if repo.ui.debugflag:
811 if repo.ui.debugflag:
824 repo.ui.debug(
812 repo.ui.debug(
825 b'adding [%s] %s (%s)\n'
813 b'adding [%s] %s (%s)\n'
826 % (src, name, util.bytecount(datalen))
814 % (src, name, util.bytecount(datalen))
827 )
815 )
828
816
829 with vfs(name, b'w') as ofp:
817 with vfs(name, b'w') as ofp:
830 for chunk in util.filechunkiter(fp, limit=datalen):
818 for chunk in util.filechunkiter(fp, limit=datalen):
831 progress.increment(step=len(chunk))
819 progress.increment(step=len(chunk))
832 ofp.write(chunk)
820 ofp.write(chunk)
833
821
834 # force @filecache properties to be reloaded from
822 # force @filecache properties to be reloaded from
835 # streamclone-ed file at next access
823 # streamclone-ed file at next access
836 repo.invalidate(clearfilecache=True)
824 repo.invalidate(clearfilecache=True)
837
825
838 elapsed = util.timer() - start
826 elapsed = util.timer() - start
839 if elapsed <= 0:
827 if elapsed <= 0:
840 elapsed = 0.001
828 elapsed = 0.001
841 repo.ui.status(
829 repo.ui.status(
842 _(b'transferred %s in %.1f seconds (%s/sec)\n')
830 _(b'transferred %s in %.1f seconds (%s/sec)\n')
843 % (
831 % (
844 util.bytecount(progress.pos),
832 util.bytecount(progress.pos),
845 elapsed,
833 elapsed,
846 util.bytecount(progress.pos / elapsed),
834 util.bytecount(progress.pos / elapsed),
847 )
835 )
848 )
836 )
849 progress.complete()
837 progress.complete()
850
838
851
839
852 def applybundlev2(repo, fp, filecount, filesize, requirements):
840 def applybundlev2(repo, fp, filecount, filesize, requirements):
853 from . import localrepo
841 from . import localrepo
854
842
855 missingreqs = [r for r in requirements if r not in repo.supported]
843 missingreqs = [r for r in requirements if r not in repo.supported]
856 if missingreqs:
844 if missingreqs:
857 raise error.Abort(
845 raise error.Abort(
858 _(b'unable to apply stream clone: unsupported format: %s')
846 _(b'unable to apply stream clone: unsupported format: %s')
859 % b', '.join(sorted(missingreqs))
847 % b', '.join(sorted(missingreqs))
860 )
848 )
861
849
862 consumev2(repo, fp, filecount, filesize)
850 consumev2(repo, fp, filecount, filesize)
863
851
864 repo.requirements = new_stream_clone_requirements(
852 repo.requirements = new_stream_clone_requirements(
865 repo.requirements,
853 repo.requirements,
866 requirements,
854 requirements,
867 )
855 )
868 repo.svfs.options = localrepo.resolvestorevfsoptions(
856 repo.svfs.options = localrepo.resolvestorevfsoptions(
869 repo.ui, repo.requirements, repo.features
857 repo.ui, repo.requirements, repo.features
870 )
858 )
871 scmutil.writereporequirements(repo)
859 scmutil.writereporequirements(repo)
872 nodemap.post_stream_cleanup(repo)
860 nodemap.post_stream_cleanup(repo)
873
861
874
862
875 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
863 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
876 hardlink = [True]
864 hardlink = [True]
877
865
878 def copy_used():
866 def copy_used():
879 hardlink[0] = False
867 hardlink[0] = False
880 progress.topic = _(b'copying')
868 progress.topic = _(b'copying')
881
869
882 for k, path in entries:
870 for k, path in entries:
883 src_vfs = src_vfs_map[k]
871 src_vfs = src_vfs_map[k]
884 dst_vfs = dst_vfs_map[k]
872 dst_vfs = dst_vfs_map[k]
885 src_path = src_vfs.join(path)
873 src_path = src_vfs.join(path)
886 dst_path = dst_vfs.join(path)
874 dst_path = dst_vfs.join(path)
887 # We cannot use dirname and makedirs of dst_vfs here because the store
875 # We cannot use dirname and makedirs of dst_vfs here because the store
888 # encoding confuses them. See issue 6581 for details.
876 # encoding confuses them. See issue 6581 for details.
889 dirname = os.path.dirname(dst_path)
877 dirname = os.path.dirname(dst_path)
890 if not os.path.exists(dirname):
878 if not os.path.exists(dirname):
891 util.makedirs(dirname)
879 util.makedirs(dirname)
892 dst_vfs.register_file(path)
880 dst_vfs.register_file(path)
893 # XXX we could use the #nb_bytes argument.
881 # XXX we could use the #nb_bytes argument.
894 util.copyfile(
882 util.copyfile(
895 src_path,
883 src_path,
896 dst_path,
884 dst_path,
897 hardlink=hardlink[0],
885 hardlink=hardlink[0],
898 no_hardlink_cb=copy_used,
886 no_hardlink_cb=copy_used,
899 check_fs_hardlink=False,
887 check_fs_hardlink=False,
900 )
888 )
901 progress.increment()
889 progress.increment()
902 return hardlink[0]
890 return hardlink[0]
903
891
904
892
905 def local_copy(src_repo, dest_repo):
893 def local_copy(src_repo, dest_repo):
906 """copy all content from one local repository to another
894 """copy all content from one local repository to another
907
895
908 This is useful for local clone"""
896 This is useful for local clone"""
909 src_store_requirements = {
897 src_store_requirements = {
910 r
898 r
911 for r in src_repo.requirements
899 for r in src_repo.requirements
912 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
900 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
913 }
901 }
914 dest_store_requirements = {
902 dest_store_requirements = {
915 r
903 r
916 for r in dest_repo.requirements
904 for r in dest_repo.requirements
917 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
905 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
918 }
906 }
919 assert src_store_requirements == dest_store_requirements
907 assert src_store_requirements == dest_store_requirements
920
908
921 with dest_repo.lock():
909 with dest_repo.lock():
922 with src_repo.lock():
910 with src_repo.lock():
923
911
924 # bookmark is not integrated to the streaming as it might use the
912 # bookmark is not integrated to the streaming as it might use the
925 # `repo.vfs` and they are too many sentitive data accessible
913 # `repo.vfs` and they are too many sentitive data accessible
926 # through `repo.vfs` to expose it to streaming clone.
914 # through `repo.vfs` to expose it to streaming clone.
927 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
915 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
928 srcbookmarks = src_book_vfs.join(b'bookmarks')
916 srcbookmarks = src_book_vfs.join(b'bookmarks')
929 bm_count = 0
917 bm_count = 0
930 if os.path.exists(srcbookmarks):
918 if os.path.exists(srcbookmarks):
931 bm_count = 1
919 bm_count = 1
932
920
933 entries = _entries_walk(
921 entries = _entries_walk(
934 src_repo,
922 src_repo,
935 includes=None,
923 includes=None,
936 excludes=None,
924 excludes=None,
937 includeobsmarkers=True,
925 includeobsmarkers=True,
938 )
926 )
939 entries = list(entries)
927 entries = list(entries)
940 src_vfs_map = _makemap(src_repo)
928 src_vfs_map = _makemap(src_repo)
941 dest_vfs_map = _makemap(dest_repo)
929 dest_vfs_map = _makemap(dest_repo)
942 total_files = sum(len(e[1].files()) for e in entries) + bm_count
930 total_files = sum(len(e[1].files()) for e in entries) + bm_count
943 progress = src_repo.ui.makeprogress(
931 progress = src_repo.ui.makeprogress(
944 topic=_(b'linking'),
932 topic=_(b'linking'),
945 total=total_files,
933 total=total_files,
946 unit=_(b'files'),
934 unit=_(b'files'),
947 )
935 )
948 # copy files
936 # copy files
949 #
937 #
950 # We could copy the full file while the source repository is locked
938 # We could copy the full file while the source repository is locked
951 # and the other one without the lock. However, in the linking case,
939 # and the other one without the lock. However, in the linking case,
952 # this would also requires checks that nobody is appending any data
940 # this would also requires checks that nobody is appending any data
953 # to the files while we do the clone, so this is not done yet. We
941 # to the files while we do the clone, so this is not done yet. We
954 # could do this blindly when copying files.
942 # could do this blindly when copying files.
955 files = [
943 files = [
956 (vfs_key, f.unencoded_path)
944 (vfs_key, f.unencoded_path)
957 for vfs_key, e in entries
945 for vfs_key, e in entries
958 for f in e.files()
946 for f in e.files()
959 ]
947 ]
960 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
948 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
961
949
962 # copy bookmarks over
950 # copy bookmarks over
963 if bm_count:
951 if bm_count:
964 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
952 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
965 dstbookmarks = dst_book_vfs.join(b'bookmarks')
953 dstbookmarks = dst_book_vfs.join(b'bookmarks')
966 util.copyfile(srcbookmarks, dstbookmarks)
954 util.copyfile(srcbookmarks, dstbookmarks)
967 progress.complete()
955 progress.complete()
968 if hardlink:
956 if hardlink:
969 msg = b'linked %d files\n'
957 msg = b'linked %d files\n'
970 else:
958 else:
971 msg = b'copied %d files\n'
959 msg = b'copied %d files\n'
972 src_repo.ui.debug(msg % total_files)
960 src_repo.ui.debug(msg % total_files)
973
961
974 with dest_repo.transaction(b"localclone") as tr:
962 with dest_repo.transaction(b"localclone") as tr:
975 dest_repo.store.write(tr)
963 dest_repo.store.write(tr)
976
964
977 # clean up transaction file as they do not make sense
965 # clean up transaction file as they do not make sense
978 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
966 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
General Comments 0
You need to be logged in to leave comments. Login now