##// END OF EJS Templates
store: make `walk` return an entry for obsolescence if requested so...
marmoute -
r51407:0925eaf0 default
parent child Browse files
Show More
@@ -1,442 +1,446 b''
1 # remotefilelogserver.py - server logic for a remotefilelog server
1 # remotefilelogserver.py - server logic for a remotefilelog server
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import os
8 import os
9 import stat
9 import stat
10 import time
10 import time
11 import zlib
11 import zlib
12
12
13 from mercurial.i18n import _
13 from mercurial.i18n import _
14 from mercurial.node import bin, hex
14 from mercurial.node import bin, hex
15 from mercurial.pycompat import open
15 from mercurial.pycompat import open
16 from mercurial import (
16 from mercurial import (
17 changegroup,
17 changegroup,
18 changelog,
18 changelog,
19 context,
19 context,
20 error,
20 error,
21 extensions,
21 extensions,
22 match,
22 match,
23 scmutil,
23 scmutil,
24 store,
24 store,
25 streamclone,
25 streamclone,
26 util,
26 util,
27 wireprotoserver,
27 wireprotoserver,
28 wireprototypes,
28 wireprototypes,
29 wireprotov1server,
29 wireprotov1server,
30 )
30 )
31 from . import (
31 from . import (
32 constants,
32 constants,
33 shallowutil,
33 shallowutil,
34 )
34 )
35
35
36 _sshv1server = wireprotoserver.sshv1protocolhandler
36 _sshv1server = wireprotoserver.sshv1protocolhandler
37
37
38
38
39 def setupserver(ui, repo):
39 def setupserver(ui, repo):
40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
41 onetimesetup(ui)
41 onetimesetup(ui)
42
42
43 # don't send files to shallow clients during pulls
43 # don't send files to shallow clients during pulls
44 def generatefiles(
44 def generatefiles(
45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
46 ):
46 ):
47 caps = self._bundlecaps or []
47 caps = self._bundlecaps or []
48 if constants.BUNDLE2_CAPABLITY in caps:
48 if constants.BUNDLE2_CAPABLITY in caps:
49 # only send files that don't match the specified patterns
49 # only send files that don't match the specified patterns
50 includepattern = None
50 includepattern = None
51 excludepattern = None
51 excludepattern = None
52 for cap in self._bundlecaps or []:
52 for cap in self._bundlecaps or []:
53 if cap.startswith(b"includepattern="):
53 if cap.startswith(b"includepattern="):
54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
55 elif cap.startswith(b"excludepattern="):
55 elif cap.startswith(b"excludepattern="):
56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
57
57
58 m = match.always()
58 m = match.always()
59 if includepattern or excludepattern:
59 if includepattern or excludepattern:
60 m = match.match(
60 m = match.match(
61 repo.root, b'', None, includepattern, excludepattern
61 repo.root, b'', None, includepattern, excludepattern
62 )
62 )
63
63
64 changedfiles = list([f for f in changedfiles if not m(f)])
64 changedfiles = list([f for f in changedfiles if not m(f)])
65 return orig(
65 return orig(
66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
67 )
67 )
68
68
69 extensions.wrapfunction(
69 extensions.wrapfunction(
70 changegroup.cgpacker, b'generatefiles', generatefiles
70 changegroup.cgpacker, b'generatefiles', generatefiles
71 )
71 )
72
72
73
73
74 onetime = False
74 onetime = False
75
75
76
76
77 def onetimesetup(ui):
77 def onetimesetup(ui):
78 """Configures the wireprotocol for both clients and servers."""
78 """Configures the wireprotocol for both clients and servers."""
79 global onetime
79 global onetime
80 if onetime:
80 if onetime:
81 return
81 return
82 onetime = True
82 onetime = True
83
83
84 # support file content requests
84 # support file content requests
85 wireprotov1server.wireprotocommand(
85 wireprotov1server.wireprotocommand(
86 b'x_rfl_getflogheads', b'path', permission=b'pull'
86 b'x_rfl_getflogheads', b'path', permission=b'pull'
87 )(getflogheads)
87 )(getflogheads)
88 wireprotov1server.wireprotocommand(
88 wireprotov1server.wireprotocommand(
89 b'x_rfl_getfiles', b'', permission=b'pull'
89 b'x_rfl_getfiles', b'', permission=b'pull'
90 )(getfiles)
90 )(getfiles)
91 wireprotov1server.wireprotocommand(
91 wireprotov1server.wireprotocommand(
92 b'x_rfl_getfile', b'file node', permission=b'pull'
92 b'x_rfl_getfile', b'file node', permission=b'pull'
93 )(getfile)
93 )(getfile)
94
94
95 class streamstate:
95 class streamstate:
96 match = None
96 match = None
97 shallowremote = False
97 shallowremote = False
98 noflatmf = False
98 noflatmf = False
99
99
100 state = streamstate()
100 state = streamstate()
101
101
102 def stream_out_shallow(repo, proto, other):
102 def stream_out_shallow(repo, proto, other):
103 includepattern = None
103 includepattern = None
104 excludepattern = None
104 excludepattern = None
105 raw = other.get(b'includepattern')
105 raw = other.get(b'includepattern')
106 if raw:
106 if raw:
107 includepattern = raw.split(b'\0')
107 includepattern = raw.split(b'\0')
108 raw = other.get(b'excludepattern')
108 raw = other.get(b'excludepattern')
109 if raw:
109 if raw:
110 excludepattern = raw.split(b'\0')
110 excludepattern = raw.split(b'\0')
111
111
112 oldshallow = state.shallowremote
112 oldshallow = state.shallowremote
113 oldmatch = state.match
113 oldmatch = state.match
114 oldnoflatmf = state.noflatmf
114 oldnoflatmf = state.noflatmf
115 try:
115 try:
116 state.shallowremote = True
116 state.shallowremote = True
117 state.match = match.always()
117 state.match = match.always()
118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
119 if includepattern or excludepattern:
119 if includepattern or excludepattern:
120 state.match = match.match(
120 state.match = match.match(
121 repo.root, b'', None, includepattern, excludepattern
121 repo.root, b'', None, includepattern, excludepattern
122 )
122 )
123 streamres = wireprotov1server.stream(repo, proto)
123 streamres = wireprotov1server.stream(repo, proto)
124
124
125 # Force the first value to execute, so the file list is computed
125 # Force the first value to execute, so the file list is computed
126 # within the try/finally scope
126 # within the try/finally scope
127 first = next(streamres.gen)
127 first = next(streamres.gen)
128 second = next(streamres.gen)
128 second = next(streamres.gen)
129
129
130 def gen():
130 def gen():
131 yield first
131 yield first
132 yield second
132 yield second
133 for value in streamres.gen:
133 for value in streamres.gen:
134 yield value
134 yield value
135
135
136 return wireprototypes.streamres(gen())
136 return wireprototypes.streamres(gen())
137 finally:
137 finally:
138 state.shallowremote = oldshallow
138 state.shallowremote = oldshallow
139 state.match = oldmatch
139 state.match = oldmatch
140 state.noflatmf = oldnoflatmf
140 state.noflatmf = oldnoflatmf
141
141
142 wireprotov1server.commands[b'stream_out_shallow'] = (
142 wireprotov1server.commands[b'stream_out_shallow'] = (
143 stream_out_shallow,
143 stream_out_shallow,
144 b'*',
144 b'*',
145 )
145 )
146
146
147 # don't clone filelogs to shallow clients
147 # don't clone filelogs to shallow clients
148 def _walkstreamfiles(orig, repo, matcher=None, phase=False):
148 def _walkstreamfiles(
149 orig, repo, matcher=None, phase=False, obsolescence=False
150 ):
149 if state.shallowremote:
151 if state.shallowremote:
150 # if we are shallow ourselves, stream our local commits
152 # if we are shallow ourselves, stream our local commits
151 if shallowutil.isenabled(repo):
153 if shallowutil.isenabled(repo):
152 striplen = len(repo.store.path) + 1
154 striplen = len(repo.store.path) + 1
153 readdir = repo.store.rawvfs.readdir
155 readdir = repo.store.rawvfs.readdir
154 visit = [os.path.join(repo.store.path, b'data')]
156 visit = [os.path.join(repo.store.path, b'data')]
155 while visit:
157 while visit:
156 p = visit.pop()
158 p = visit.pop()
157 for f, kind, st in readdir(p, stat=True):
159 for f, kind, st in readdir(p, stat=True):
158 fp = p + b'/' + f
160 fp = p + b'/' + f
159 if kind == stat.S_IFREG:
161 if kind == stat.S_IFREG:
160 if not fp.endswith(b'.i') and not fp.endswith(
162 if not fp.endswith(b'.i') and not fp.endswith(
161 b'.d'
163 b'.d'
162 ):
164 ):
163 n = util.pconvert(fp[striplen:])
165 n = util.pconvert(fp[striplen:])
164 d = store.decodedir(n)
166 d = store.decodedir(n)
165 yield store.SimpleStoreEntry(
167 yield store.SimpleStoreEntry(
166 entry_path=d,
168 entry_path=d,
167 is_volatile=False,
169 is_volatile=False,
168 file_size=st.st_size,
170 file_size=st.st_size,
169 )
171 )
170
172
171 if kind == stat.S_IFDIR:
173 if kind == stat.S_IFDIR:
172 visit.append(fp)
174 visit.append(fp)
173
175
174 if scmutil.istreemanifest(repo):
176 if scmutil.istreemanifest(repo):
175 for entry in repo.store.data_entries():
177 for entry in repo.store.data_entries():
176 if not entry.is_revlog:
178 if not entry.is_revlog:
177 continue
179 continue
178 if entry.is_manifestlog:
180 if entry.is_manifestlog:
179 yield entry
181 yield entry
180
182
181 # Return .d and .i files that do not match the shallow pattern
183 # Return .d and .i files that do not match the shallow pattern
182 match = state.match
184 match = state.match
183 if match and not match.always():
185 if match and not match.always():
184 for entry in repo.store.data_entries():
186 for entry in repo.store.data_entries():
185 if not entry.is_revlog:
187 if not entry.is_revlog:
186 continue
188 continue
187 if not state.match(entry.target_id):
189 if not state.match(entry.target_id):
188 yield entry
190 yield entry
189
191
190 for x in repo.store.top_entries():
192 for x in repo.store.top_entries():
191 if state.noflatmf and x[1][:11] == b'00manifest.':
193 if state.noflatmf and x[1][:11] == b'00manifest.':
192 continue
194 continue
193 yield x
195 yield x
194
196
195 elif shallowutil.isenabled(repo):
197 elif shallowutil.isenabled(repo):
196 # don't allow cloning from a shallow repo to a full repo
198 # don't allow cloning from a shallow repo to a full repo
197 # since it would require fetching every version of every
199 # since it would require fetching every version of every
198 # file in order to create the revlogs.
200 # file in order to create the revlogs.
199 raise error.Abort(
201 raise error.Abort(
200 _(b"Cannot clone from a shallow repo to a full repo.")
202 _(b"Cannot clone from a shallow repo to a full repo.")
201 )
203 )
202 else:
204 else:
203 for x in orig(repo, matcher, phase=phase):
205 for x in orig(
206 repo, matcher, phase=phase, obsolescence=obsolescence
207 ):
204 yield x
208 yield x
205
209
206 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
210 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
207
211
208 # expose remotefilelog capabilities
212 # expose remotefilelog capabilities
209 def _capabilities(orig, repo, proto):
213 def _capabilities(orig, repo, proto):
210 caps = orig(repo, proto)
214 caps = orig(repo, proto)
211 if shallowutil.isenabled(repo) or ui.configbool(
215 if shallowutil.isenabled(repo) or ui.configbool(
212 b'remotefilelog', b'server'
216 b'remotefilelog', b'server'
213 ):
217 ):
214 if isinstance(proto, _sshv1server):
218 if isinstance(proto, _sshv1server):
215 # legacy getfiles method which only works over ssh
219 # legacy getfiles method which only works over ssh
216 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
220 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
217 caps.append(b'x_rfl_getflogheads')
221 caps.append(b'x_rfl_getflogheads')
218 caps.append(b'x_rfl_getfile')
222 caps.append(b'x_rfl_getfile')
219 return caps
223 return caps
220
224
221 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
225 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
222
226
223 def _adjustlinkrev(orig, self, *args, **kwargs):
227 def _adjustlinkrev(orig, self, *args, **kwargs):
224 # When generating file blobs, taking the real path is too slow on large
228 # When generating file blobs, taking the real path is too slow on large
225 # repos, so force it to just return the linkrev directly.
229 # repos, so force it to just return the linkrev directly.
226 repo = self._repo
230 repo = self._repo
227 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
231 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
228 return self._filelog.linkrev(self._filelog.rev(self._filenode))
232 return self._filelog.linkrev(self._filelog.rev(self._filenode))
229 return orig(self, *args, **kwargs)
233 return orig(self, *args, **kwargs)
230
234
231 extensions.wrapfunction(
235 extensions.wrapfunction(
232 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
236 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
233 )
237 )
234
238
235 def _iscmd(orig, cmd):
239 def _iscmd(orig, cmd):
236 if cmd == b'x_rfl_getfiles':
240 if cmd == b'x_rfl_getfiles':
237 return False
241 return False
238 return orig(cmd)
242 return orig(cmd)
239
243
240 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
244 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
241
245
242
246
243 def _loadfileblob(repo, cachepath, path, node):
247 def _loadfileblob(repo, cachepath, path, node):
244 filecachepath = os.path.join(cachepath, path, hex(node))
248 filecachepath = os.path.join(cachepath, path, hex(node))
245 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
249 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
246 filectx = repo.filectx(path, fileid=node)
250 filectx = repo.filectx(path, fileid=node)
247 if filectx.node() == repo.nullid:
251 if filectx.node() == repo.nullid:
248 repo.changelog = changelog.changelog(repo.svfs)
252 repo.changelog = changelog.changelog(repo.svfs)
249 filectx = repo.filectx(path, fileid=node)
253 filectx = repo.filectx(path, fileid=node)
250
254
251 text = createfileblob(filectx)
255 text = createfileblob(filectx)
252 # TODO configurable compression engines
256 # TODO configurable compression engines
253 text = zlib.compress(text)
257 text = zlib.compress(text)
254
258
255 # everything should be user & group read/writable
259 # everything should be user & group read/writable
256 oldumask = os.umask(0o002)
260 oldumask = os.umask(0o002)
257 try:
261 try:
258 dirname = os.path.dirname(filecachepath)
262 dirname = os.path.dirname(filecachepath)
259 if not os.path.exists(dirname):
263 if not os.path.exists(dirname):
260 try:
264 try:
261 os.makedirs(dirname)
265 os.makedirs(dirname)
262 except FileExistsError:
266 except FileExistsError:
263 pass
267 pass
264
268
265 f = None
269 f = None
266 try:
270 try:
267 f = util.atomictempfile(filecachepath, b"wb")
271 f = util.atomictempfile(filecachepath, b"wb")
268 f.write(text)
272 f.write(text)
269 except (IOError, OSError):
273 except (IOError, OSError):
270 # Don't abort if the user only has permission to read,
274 # Don't abort if the user only has permission to read,
271 # and not write.
275 # and not write.
272 pass
276 pass
273 finally:
277 finally:
274 if f:
278 if f:
275 f.close()
279 f.close()
276 finally:
280 finally:
277 os.umask(oldumask)
281 os.umask(oldumask)
278 else:
282 else:
279 with open(filecachepath, b"rb") as f:
283 with open(filecachepath, b"rb") as f:
280 text = f.read()
284 text = f.read()
281 return text
285 return text
282
286
283
287
284 def getflogheads(repo, proto, path):
288 def getflogheads(repo, proto, path):
285 """A server api for requesting a filelog's heads"""
289 """A server api for requesting a filelog's heads"""
286 flog = repo.file(path)
290 flog = repo.file(path)
287 heads = flog.heads()
291 heads = flog.heads()
288 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
292 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
289
293
290
294
291 def getfile(repo, proto, file, node):
295 def getfile(repo, proto, file, node):
292 """A server api for requesting a particular version of a file. Can be used
296 """A server api for requesting a particular version of a file. Can be used
293 in batches to request many files at once. The return protocol is:
297 in batches to request many files at once. The return protocol is:
294 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
298 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
295 non-zero for an error.
299 non-zero for an error.
296
300
297 data is a compressed blob with revlog flag and ancestors information. See
301 data is a compressed blob with revlog flag and ancestors information. See
298 createfileblob for its content.
302 createfileblob for its content.
299 """
303 """
300 if shallowutil.isenabled(repo):
304 if shallowutil.isenabled(repo):
301 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
305 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
302 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
306 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
303 if not cachepath:
307 if not cachepath:
304 cachepath = os.path.join(repo.path, b"remotefilelogcache")
308 cachepath = os.path.join(repo.path, b"remotefilelogcache")
305 node = bin(node.strip())
309 node = bin(node.strip())
306 if node == repo.nullid:
310 if node == repo.nullid:
307 return b'0\0'
311 return b'0\0'
308 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
312 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
309
313
310
314
311 def getfiles(repo, proto):
315 def getfiles(repo, proto):
312 """A server api for requesting particular versions of particular files."""
316 """A server api for requesting particular versions of particular files."""
313 if shallowutil.isenabled(repo):
317 if shallowutil.isenabled(repo):
314 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
318 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
315 if not isinstance(proto, _sshv1server):
319 if not isinstance(proto, _sshv1server):
316 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
320 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
317
321
318 def streamer():
322 def streamer():
319 fin = proto._fin
323 fin = proto._fin
320
324
321 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
325 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
322 if not cachepath:
326 if not cachepath:
323 cachepath = os.path.join(repo.path, b"remotefilelogcache")
327 cachepath = os.path.join(repo.path, b"remotefilelogcache")
324
328
325 while True:
329 while True:
326 request = fin.readline()[:-1]
330 request = fin.readline()[:-1]
327 if not request:
331 if not request:
328 break
332 break
329
333
330 node = bin(request[:40])
334 node = bin(request[:40])
331 if node == repo.nullid:
335 if node == repo.nullid:
332 yield b'0\n'
336 yield b'0\n'
333 continue
337 continue
334
338
335 path = request[40:]
339 path = request[40:]
336
340
337 text = _loadfileblob(repo, cachepath, path, node)
341 text = _loadfileblob(repo, cachepath, path, node)
338
342
339 yield b'%d\n%s' % (len(text), text)
343 yield b'%d\n%s' % (len(text), text)
340
344
341 # it would be better to only flush after processing a whole batch
345 # it would be better to only flush after processing a whole batch
342 # but currently we don't know if there are more requests coming
346 # but currently we don't know if there are more requests coming
343 proto._fout.flush()
347 proto._fout.flush()
344
348
345 return wireprototypes.streamres(streamer())
349 return wireprototypes.streamres(streamer())
346
350
347
351
348 def createfileblob(filectx):
352 def createfileblob(filectx):
349 """
353 """
350 format:
354 format:
351 v0:
355 v0:
352 str(len(rawtext)) + '\0' + rawtext + ancestortext
356 str(len(rawtext)) + '\0' + rawtext + ancestortext
353 v1:
357 v1:
354 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
358 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
355 metalist := metalist + '\n' + meta | meta
359 metalist := metalist + '\n' + meta | meta
356 meta := sizemeta | flagmeta
360 meta := sizemeta | flagmeta
357 sizemeta := METAKEYSIZE + str(len(rawtext))
361 sizemeta := METAKEYSIZE + str(len(rawtext))
358 flagmeta := METAKEYFLAG + str(flag)
362 flagmeta := METAKEYFLAG + str(flag)
359
363
360 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
364 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
361 length of 1.
365 length of 1.
362 """
366 """
363 flog = filectx.filelog()
367 flog = filectx.filelog()
364 frev = filectx.filerev()
368 frev = filectx.filerev()
365 revlogflags = flog._revlog.flags(frev)
369 revlogflags = flog._revlog.flags(frev)
366 if revlogflags == 0:
370 if revlogflags == 0:
367 # normal files
371 # normal files
368 text = filectx.data()
372 text = filectx.data()
369 else:
373 else:
370 # lfs, read raw revision data
374 # lfs, read raw revision data
371 text = flog.rawdata(frev)
375 text = flog.rawdata(frev)
372
376
373 repo = filectx._repo
377 repo = filectx._repo
374
378
375 ancestors = [filectx]
379 ancestors = [filectx]
376
380
377 try:
381 try:
378 repo.forcelinkrev = True
382 repo.forcelinkrev = True
379 ancestors.extend([f for f in filectx.ancestors()])
383 ancestors.extend([f for f in filectx.ancestors()])
380
384
381 ancestortext = b""
385 ancestortext = b""
382 for ancestorctx in ancestors:
386 for ancestorctx in ancestors:
383 parents = ancestorctx.parents()
387 parents = ancestorctx.parents()
384 p1 = repo.nullid
388 p1 = repo.nullid
385 p2 = repo.nullid
389 p2 = repo.nullid
386 if len(parents) > 0:
390 if len(parents) > 0:
387 p1 = parents[0].filenode()
391 p1 = parents[0].filenode()
388 if len(parents) > 1:
392 if len(parents) > 1:
389 p2 = parents[1].filenode()
393 p2 = parents[1].filenode()
390
394
391 copyname = b""
395 copyname = b""
392 rename = ancestorctx.renamed()
396 rename = ancestorctx.renamed()
393 if rename:
397 if rename:
394 copyname = rename[0]
398 copyname = rename[0]
395 linknode = ancestorctx.node()
399 linknode = ancestorctx.node()
396 ancestortext += b"%s%s%s%s%s\0" % (
400 ancestortext += b"%s%s%s%s%s\0" % (
397 ancestorctx.filenode(),
401 ancestorctx.filenode(),
398 p1,
402 p1,
399 p2,
403 p2,
400 linknode,
404 linknode,
401 copyname,
405 copyname,
402 )
406 )
403 finally:
407 finally:
404 repo.forcelinkrev = False
408 repo.forcelinkrev = False
405
409
406 header = shallowutil.buildfileblobheader(len(text), revlogflags)
410 header = shallowutil.buildfileblobheader(len(text), revlogflags)
407
411
408 return b"%s\0%s%s" % (header, text, ancestortext)
412 return b"%s\0%s%s" % (header, text, ancestortext)
409
413
410
414
411 def gcserver(ui, repo):
415 def gcserver(ui, repo):
412 if not repo.ui.configbool(b"remotefilelog", b"server"):
416 if not repo.ui.configbool(b"remotefilelog", b"server"):
413 return
417 return
414
418
415 neededfiles = set()
419 neededfiles = set()
416 heads = repo.revs(b"heads(tip~25000:) - null")
420 heads = repo.revs(b"heads(tip~25000:) - null")
417
421
418 cachepath = repo.vfs.join(b"remotefilelogcache")
422 cachepath = repo.vfs.join(b"remotefilelogcache")
419 for head in heads:
423 for head in heads:
420 mf = repo[head].manifest()
424 mf = repo[head].manifest()
421 for filename, filenode in mf.items():
425 for filename, filenode in mf.items():
422 filecachepath = os.path.join(cachepath, filename, hex(filenode))
426 filecachepath = os.path.join(cachepath, filename, hex(filenode))
423 neededfiles.add(filecachepath)
427 neededfiles.add(filecachepath)
424
428
425 # delete unneeded older files
429 # delete unneeded older files
426 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
430 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
427 expiration = time.time() - (days * 24 * 60 * 60)
431 expiration = time.time() - (days * 24 * 60 * 60)
428
432
429 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
433 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
430 progress.update(0)
434 progress.update(0)
431 for root, dirs, files in os.walk(cachepath):
435 for root, dirs, files in os.walk(cachepath):
432 for file in files:
436 for file in files:
433 filepath = os.path.join(root, file)
437 filepath = os.path.join(root, file)
434 progress.increment()
438 progress.increment()
435 if filepath in neededfiles:
439 if filepath in neededfiles:
436 continue
440 continue
437
441
438 stat = os.stat(filepath)
442 stat = os.stat(filepath)
439 if stat.st_mtime < expiration:
443 if stat.st_mtime < expiration:
440 os.remove(filepath)
444 os.remove(filepath)
441
445
442 progress.complete()
446 progress.complete()
@@ -1,1089 +1,1098 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _match_tracked_entry(entry, matcher):
36 def _match_tracked_entry(entry, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 if entry.is_filelog:
44 if entry.is_filelog:
45 return matcher(entry.target_id)
45 return matcher(entry.target_id)
46 elif entry.is_manifestlog:
46 elif entry.is_manifestlog:
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49
49
50
50
51 # This avoids a collision between a file named foo and a dir named
51 # This avoids a collision between a file named foo and a dir named
52 # foo.i or foo.d
52 # foo.i or foo.d
53 def _encodedir(path):
53 def _encodedir(path):
54 """
54 """
55 >>> _encodedir(b'data/foo.i')
55 >>> _encodedir(b'data/foo.i')
56 'data/foo.i'
56 'data/foo.i'
57 >>> _encodedir(b'data/foo.i/bla.i')
57 >>> _encodedir(b'data/foo.i/bla.i')
58 'data/foo.i.hg/bla.i'
58 'data/foo.i.hg/bla.i'
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 'data/foo.i.hg.hg/bla.i'
60 'data/foo.i.hg.hg/bla.i'
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 """
63 """
64 return (
64 return (
65 path.replace(b".hg/", b".hg.hg/")
65 path.replace(b".hg/", b".hg.hg/")
66 .replace(b".i/", b".i.hg/")
66 .replace(b".i/", b".i.hg/")
67 .replace(b".d/", b".d.hg/")
67 .replace(b".d/", b".d.hg/")
68 )
68 )
69
69
70
70
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72
72
73
73
74 def decodedir(path):
74 def decodedir(path):
75 """
75 """
76 >>> decodedir(b'data/foo.i')
76 >>> decodedir(b'data/foo.i')
77 'data/foo.i'
77 'data/foo.i'
78 >>> decodedir(b'data/foo.i.hg/bla.i')
78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 'data/foo.i/bla.i'
79 'data/foo.i/bla.i'
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 'data/foo.i.hg/bla.i'
81 'data/foo.i.hg/bla.i'
82 """
82 """
83 if b".hg/" not in path:
83 if b".hg/" not in path:
84 return path
84 return path
85 return (
85 return (
86 path.replace(b".d.hg/", b".d/")
86 path.replace(b".d.hg/", b".d/")
87 .replace(b".i.hg/", b".i/")
87 .replace(b".i.hg/", b".i/")
88 .replace(b".hg.hg/", b".hg/")
88 .replace(b".hg.hg/", b".hg/")
89 )
89 )
90
90
91
91
92 def _reserved():
92 def _reserved():
93 """characters that are problematic for filesystems
93 """characters that are problematic for filesystems
94
94
95 * ascii escapes (0..31)
95 * ascii escapes (0..31)
96 * ascii hi (126..255)
96 * ascii hi (126..255)
97 * windows specials
97 * windows specials
98
98
99 these characters will be escaped by encodefunctions
99 these characters will be escaped by encodefunctions
100 """
100 """
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 for x in range(32):
102 for x in range(32):
103 yield x
103 yield x
104 for x in range(126, 256):
104 for x in range(126, 256):
105 yield x
105 yield x
106 for x in winreserved:
106 for x in winreserved:
107 yield x
107 yield x
108
108
109
109
110 def _buildencodefun():
110 def _buildencodefun():
111 """
111 """
112 >>> enc, dec = _buildencodefun()
112 >>> enc, dec = _buildencodefun()
113
113
114 >>> enc(b'nothing/special.txt')
114 >>> enc(b'nothing/special.txt')
115 'nothing/special.txt'
115 'nothing/special.txt'
116 >>> dec(b'nothing/special.txt')
116 >>> dec(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118
118
119 >>> enc(b'HELLO')
119 >>> enc(b'HELLO')
120 '_h_e_l_l_o'
120 '_h_e_l_l_o'
121 >>> dec(b'_h_e_l_l_o')
121 >>> dec(b'_h_e_l_l_o')
122 'HELLO'
122 'HELLO'
123
123
124 >>> enc(b'hello:world?')
124 >>> enc(b'hello:world?')
125 'hello~3aworld~3f'
125 'hello~3aworld~3f'
126 >>> dec(b'hello~3aworld~3f')
126 >>> dec(b'hello~3aworld~3f')
127 'hello:world?'
127 'hello:world?'
128
128
129 >>> enc(b'the\\x07quick\\xADshot')
129 >>> enc(b'the\\x07quick\\xADshot')
130 'the~07quick~adshot'
130 'the~07quick~adshot'
131 >>> dec(b'the~07quick~adshot')
131 >>> dec(b'the~07quick~adshot')
132 'the\\x07quick\\xadshot'
132 'the\\x07quick\\xadshot'
133 """
133 """
134 e = b'_'
134 e = b'_'
135 xchr = pycompat.bytechr
135 xchr = pycompat.bytechr
136 asciistr = list(map(xchr, range(127)))
136 asciistr = list(map(xchr, range(127)))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138
138
139 cmap = {x: x for x in asciistr}
139 cmap = {x: x for x in asciistr}
140 for x in _reserved():
140 for x in _reserved():
141 cmap[xchr(x)] = b"~%02x" % x
141 cmap[xchr(x)] = b"~%02x" % x
142 for x in capitals + [ord(e)]:
142 for x in capitals + [ord(e)]:
143 cmap[xchr(x)] = e + xchr(x).lower()
143 cmap[xchr(x)] = e + xchr(x).lower()
144
144
145 dmap = {}
145 dmap = {}
146 for k, v in cmap.items():
146 for k, v in cmap.items():
147 dmap[v] = k
147 dmap[v] = k
148
148
149 def decode(s):
149 def decode(s):
150 i = 0
150 i = 0
151 while i < len(s):
151 while i < len(s):
152 for l in range(1, 4):
152 for l in range(1, 4):
153 try:
153 try:
154 yield dmap[s[i : i + l]]
154 yield dmap[s[i : i + l]]
155 i += l
155 i += l
156 break
156 break
157 except KeyError:
157 except KeyError:
158 pass
158 pass
159 else:
159 else:
160 raise KeyError
160 raise KeyError
161
161
162 return (
162 return (
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join(list(decode(s))),
164 lambda s: b''.join(list(decode(s))),
165 )
165 )
166
166
167
167
168 _encodefname, _decodefname = _buildencodefun()
168 _encodefname, _decodefname = _buildencodefun()
169
169
170
170
171 def encodefilename(s):
171 def encodefilename(s):
172 """
172 """
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 """
175 """
176 return _encodefname(encodedir(s))
176 return _encodefname(encodedir(s))
177
177
178
178
179 def decodefilename(s):
179 def decodefilename(s):
180 """
180 """
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 """
183 """
184 return decodedir(_decodefname(s))
184 return decodedir(_decodefname(s))
185
185
186
186
187 def _buildlowerencodefun():
187 def _buildlowerencodefun():
188 """
188 """
189 >>> f = _buildlowerencodefun()
189 >>> f = _buildlowerencodefun()
190 >>> f(b'nothing/special.txt')
190 >>> f(b'nothing/special.txt')
191 'nothing/special.txt'
191 'nothing/special.txt'
192 >>> f(b'HELLO')
192 >>> f(b'HELLO')
193 'hello'
193 'hello'
194 >>> f(b'hello:world?')
194 >>> f(b'hello:world?')
195 'hello~3aworld~3f'
195 'hello~3aworld~3f'
196 >>> f(b'the\\x07quick\\xADshot')
196 >>> f(b'the\\x07quick\\xADshot')
197 'the~07quick~adshot'
197 'the~07quick~adshot'
198 """
198 """
199 xchr = pycompat.bytechr
199 xchr = pycompat.bytechr
200 cmap = {xchr(x): xchr(x) for x in range(127)}
200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 for x in _reserved():
201 for x in _reserved():
202 cmap[xchr(x)] = b"~%02x" % x
202 cmap[xchr(x)] = b"~%02x" % x
203 for x in range(ord(b"A"), ord(b"Z") + 1):
203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 cmap[xchr(x)] = xchr(x).lower()
204 cmap[xchr(x)] = xchr(x).lower()
205
205
206 def lowerencode(s):
206 def lowerencode(s):
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208
208
209 return lowerencode
209 return lowerencode
210
210
211
211
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213
213
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217
217
218
218
219 def _auxencode(path, dotencode):
219 def _auxencode(path, dotencode):
220 """
220 """
221 Encodes filenames containing names reserved by Windows or which end in
221 Encodes filenames containing names reserved by Windows or which end in
222 period or space. Does not touch other single reserved characters c.
222 period or space. Does not touch other single reserved characters c.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Additionally encodes space or period at the beginning, if dotencode is
224 Additionally encodes space or period at the beginning, if dotencode is
225 True. Parameter path is assumed to be all lowercase.
225 True. Parameter path is assumed to be all lowercase.
226 A segment only needs encoding if a reserved name appears as a
226 A segment only needs encoding if a reserved name appears as a
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 doesn't need encoding.
228 doesn't need encoding.
229
229
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> _auxencode(s.split(b'/'), True)
231 >>> _auxencode(s.split(b'/'), True)
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> _auxencode(s.split(b'/'), False)
234 >>> _auxencode(s.split(b'/'), False)
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 >>> _auxencode([b'foo. '], True)
236 >>> _auxencode([b'foo. '], True)
237 ['foo.~20']
237 ['foo.~20']
238 >>> _auxencode([b' .foo'], True)
238 >>> _auxencode([b' .foo'], True)
239 ['~20.foo']
239 ['~20.foo']
240 """
240 """
241 for i, n in enumerate(path):
241 for i, n in enumerate(path):
242 if not n:
242 if not n:
243 continue
243 continue
244 if dotencode and n[0] in b'. ':
244 if dotencode and n[0] in b'. ':
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 path[i] = n
246 path[i] = n
247 else:
247 else:
248 l = n.find(b'.')
248 l = n.find(b'.')
249 if l == -1:
249 if l == -1:
250 l = len(n)
250 l = len(n)
251 if (l == 3 and n[:3] in _winres3) or (
251 if (l == 3 and n[:3] in _winres3) or (
252 l == 4
252 l == 4
253 and n[3:4] <= b'9'
253 and n[3:4] <= b'9'
254 and n[3:4] >= b'1'
254 and n[3:4] >= b'1'
255 and n[:3] in _winres4
255 and n[:3] in _winres4
256 ):
256 ):
257 # encode third letter ('aux' -> 'au~78')
257 # encode third letter ('aux' -> 'au~78')
258 ec = b"~%02x" % ord(n[2:3])
258 ec = b"~%02x" % ord(n[2:3])
259 n = n[0:2] + ec + n[3:]
259 n = n[0:2] + ec + n[3:]
260 path[i] = n
260 path[i] = n
261 if n[-1] in b'. ':
261 if n[-1] in b'. ':
262 # encode last period or space ('foo...' -> 'foo..~2e')
262 # encode last period or space ('foo...' -> 'foo..~2e')
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 return path
264 return path
265
265
266
266
267 _maxstorepathlen = 120
267 _maxstorepathlen = 120
268 _dirprefixlen = 8
268 _dirprefixlen = 8
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270
270
271
271
272 def _hashencode(path, dotencode):
272 def _hashencode(path, dotencode):
273 digest = hex(hashutil.sha1(path).digest())
273 digest = hex(hashutil.sha1(path).digest())
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 parts = _auxencode(le, dotencode)
275 parts = _auxencode(le, dotencode)
276 basename = parts[-1]
276 basename = parts[-1]
277 _root, ext = os.path.splitext(basename)
277 _root, ext = os.path.splitext(basename)
278 sdirs = []
278 sdirs = []
279 sdirslen = 0
279 sdirslen = 0
280 for p in parts[:-1]:
280 for p in parts[:-1]:
281 d = p[:_dirprefixlen]
281 d = p[:_dirprefixlen]
282 if d[-1] in b'. ':
282 if d[-1] in b'. ':
283 # Windows can't access dirs ending in period or space
283 # Windows can't access dirs ending in period or space
284 d = d[:-1] + b'_'
284 d = d[:-1] + b'_'
285 if sdirslen == 0:
285 if sdirslen == 0:
286 t = len(d)
286 t = len(d)
287 else:
287 else:
288 t = sdirslen + 1 + len(d)
288 t = sdirslen + 1 + len(d)
289 if t > _maxshortdirslen:
289 if t > _maxshortdirslen:
290 break
290 break
291 sdirs.append(d)
291 sdirs.append(d)
292 sdirslen = t
292 sdirslen = t
293 dirs = b'/'.join(sdirs)
293 dirs = b'/'.join(sdirs)
294 if len(dirs) > 0:
294 if len(dirs) > 0:
295 dirs += b'/'
295 dirs += b'/'
296 res = b'dh/' + dirs + digest + ext
296 res = b'dh/' + dirs + digest + ext
297 spaceleft = _maxstorepathlen - len(res)
297 spaceleft = _maxstorepathlen - len(res)
298 if spaceleft > 0:
298 if spaceleft > 0:
299 filler = basename[:spaceleft]
299 filler = basename[:spaceleft]
300 res = b'dh/' + dirs + filler + digest + ext
300 res = b'dh/' + dirs + filler + digest + ext
301 return res
301 return res
302
302
303
303
304 def _hybridencode(path, dotencode):
304 def _hybridencode(path, dotencode):
305 """encodes path with a length limit
305 """encodes path with a length limit
306
306
307 Encodes all paths that begin with 'data/', according to the following.
307 Encodes all paths that begin with 'data/', according to the following.
308
308
309 Default encoding (reversible):
309 Default encoding (reversible):
310
310
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 characters are encoded as '~xx', where xx is the two digit hex code
312 characters are encoded as '~xx', where xx is the two digit hex code
313 of the character (see encodefilename).
313 of the character (see encodefilename).
314 Relevant path components consisting of Windows reserved filenames are
314 Relevant path components consisting of Windows reserved filenames are
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316
316
317 Hashed encoding (not reversible):
317 Hashed encoding (not reversible):
318
318
319 If the default-encoded path is longer than _maxstorepathlen, a
319 If the default-encoded path is longer than _maxstorepathlen, a
320 non-reversible hybrid hashing of the path is done instead.
320 non-reversible hybrid hashing of the path is done instead.
321 This encoding uses up to _dirprefixlen characters of all directory
321 This encoding uses up to _dirprefixlen characters of all directory
322 levels of the lowerencoded path, but not more levels than can fit into
322 levels of the lowerencoded path, but not more levels than can fit into
323 _maxshortdirslen.
323 _maxshortdirslen.
324 Then follows the filler followed by the sha digest of the full path.
324 Then follows the filler followed by the sha digest of the full path.
325 The filler is the beginning of the basename of the lowerencoded path
325 The filler is the beginning of the basename of the lowerencoded path
326 (the basename is everything after the last path separator). The filler
326 (the basename is everything after the last path separator). The filler
327 is as long as possible, filling in characters from the basename until
327 is as long as possible, filling in characters from the basename until
328 the encoded path has _maxstorepathlen characters (or all chars of the
328 the encoded path has _maxstorepathlen characters (or all chars of the
329 basename have been taken).
329 basename have been taken).
330 The extension (e.g. '.i' or '.d') is preserved.
330 The extension (e.g. '.i' or '.d') is preserved.
331
331
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 encoding was used.
333 encoding was used.
334 """
334 """
335 path = encodedir(path)
335 path = encodedir(path)
336 ef = _encodefname(path).split(b'/')
336 ef = _encodefname(path).split(b'/')
337 res = b'/'.join(_auxencode(ef, dotencode))
337 res = b'/'.join(_auxencode(ef, dotencode))
338 if len(res) > _maxstorepathlen:
338 if len(res) > _maxstorepathlen:
339 res = _hashencode(path, dotencode)
339 res = _hashencode(path, dotencode)
340 return res
340 return res
341
341
342
342
343 def _pathencode(path):
343 def _pathencode(path):
344 de = encodedir(path)
344 de = encodedir(path)
345 if len(path) > _maxstorepathlen:
345 if len(path) > _maxstorepathlen:
346 return _hashencode(de, True)
346 return _hashencode(de, True)
347 ef = _encodefname(de).split(b'/')
347 ef = _encodefname(de).split(b'/')
348 res = b'/'.join(_auxencode(ef, True))
348 res = b'/'.join(_auxencode(ef, True))
349 if len(res) > _maxstorepathlen:
349 if len(res) > _maxstorepathlen:
350 return _hashencode(de, True)
350 return _hashencode(de, True)
351 return res
351 return res
352
352
353
353
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355
355
356
356
357 def _plainhybridencode(f):
357 def _plainhybridencode(f):
358 return _hybridencode(f, False)
358 return _hybridencode(f, False)
359
359
360
360
361 def _calcmode(vfs):
361 def _calcmode(vfs):
362 try:
362 try:
363 # files in .hg/ will be created using this mode
363 # files in .hg/ will be created using this mode
364 mode = vfs.stat().st_mode
364 mode = vfs.stat().st_mode
365 # avoid some useless chmods
365 # avoid some useless chmods
366 if (0o777 & ~util.umask) == (0o777 & mode):
366 if (0o777 & ~util.umask) == (0o777 & mode):
367 mode = None
367 mode = None
368 except OSError:
368 except OSError:
369 mode = None
369 mode = None
370 return mode
370 return mode
371
371
372
372
373 _data = [
373 _data = [
374 b'bookmarks',
374 b'bookmarks',
375 b'narrowspec',
375 b'narrowspec',
376 b'data',
376 b'data',
377 b'meta',
377 b'meta',
378 b'00manifest.d',
378 b'00manifest.d',
379 b'00manifest.i',
379 b'00manifest.i',
380 b'00changelog.d',
380 b'00changelog.d',
381 b'00changelog.i',
381 b'00changelog.i',
382 b'phaseroots',
382 b'phaseroots',
383 b'obsstore',
383 b'obsstore',
384 b'requires',
384 b'requires',
385 ]
385 ]
386
386
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_OTHER_EXT = (
388 REVLOG_FILES_OTHER_EXT = (
389 b'.idx',
389 b'.idx',
390 b'.d',
390 b'.d',
391 b'.dat',
391 b'.dat',
392 b'.n',
392 b'.n',
393 b'.nd',
393 b'.nd',
394 b'.sda',
394 b'.sda',
395 )
395 )
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 REVLOG_FILES_LONG_EXT = (
397 REVLOG_FILES_LONG_EXT = (
398 b'.nd',
398 b'.nd',
399 b'.idx',
399 b'.idx',
400 b'.dat',
400 b'.dat',
401 b'.sda',
401 b'.sda',
402 )
402 )
403 # files that are "volatile" and might change between listing and streaming
403 # files that are "volatile" and might change between listing and streaming
404 #
404 #
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # deleted.
406 # deleted.
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408
408
409 # some exception to the above matching
409 # some exception to the above matching
410 #
410 #
411 # XXX This is currently not in use because of issue6542
411 # XXX This is currently not in use because of issue6542
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413
413
414
414
415 def is_revlog(f, kind, st):
415 def is_revlog(f, kind, st):
416 if kind != stat.S_IFREG:
416 if kind != stat.S_IFREG:
417 return None
417 return None
418 return revlog_type(f)
418 return revlog_type(f)
419
419
420
420
421 def revlog_type(f):
421 def revlog_type(f):
422 # XXX we need to filter `undo.` created by the transaction here, however
422 # XXX we need to filter `undo.` created by the transaction here, however
423 # being naive about it also filter revlog for `undo.*` files, leading to
423 # being naive about it also filter revlog for `undo.*` files, leading to
424 # issue6542. So we no longer use EXCLUDED.
424 # issue6542. So we no longer use EXCLUDED.
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 return FILEFLAGS_REVLOG_MAIN
426 return FILEFLAGS_REVLOG_MAIN
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 t = FILETYPE_FILELOG_OTHER
428 t = FILETYPE_FILELOG_OTHER
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 t |= FILEFLAGS_VOLATILE
430 t |= FILEFLAGS_VOLATILE
431 return t
431 return t
432 return None
432 return None
433
433
434
434
435 # the file is part of changelog data
435 # the file is part of changelog data
436 FILEFLAGS_CHANGELOG = 1 << 13
436 FILEFLAGS_CHANGELOG = 1 << 13
437 # the file is part of manifest data
437 # the file is part of manifest data
438 FILEFLAGS_MANIFESTLOG = 1 << 12
438 FILEFLAGS_MANIFESTLOG = 1 << 12
439 # the file is part of filelog data
439 # the file is part of filelog data
440 FILEFLAGS_FILELOG = 1 << 11
440 FILEFLAGS_FILELOG = 1 << 11
441 # file that are not directly part of a revlog
441 # file that are not directly part of a revlog
442 FILEFLAGS_OTHER = 1 << 10
442 FILEFLAGS_OTHER = 1 << 10
443
443
444 # the main entry point for a revlog
444 # the main entry point for a revlog
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 # a secondary file for a revlog
446 # a secondary file for a revlog
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
448
448
449 # files that are "volatile" and might change between listing and streaming
449 # files that are "volatile" and might change between listing and streaming
450 FILEFLAGS_VOLATILE = 1 << 20
450 FILEFLAGS_VOLATILE = 1 << 20
451
451
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
459
459
460
460
461 @attr.s(slots=True, init=False)
461 @attr.s(slots=True, init=False)
462 class BaseStoreEntry:
462 class BaseStoreEntry:
463 """An entry in the store
463 """An entry in the store
464
464
465 This is returned by `store.walk` and represent some data in the store."""
465 This is returned by `store.walk` and represent some data in the store."""
466
466
467
467
468 @attr.s(slots=True, init=False)
468 @attr.s(slots=True, init=False)
469 class SimpleStoreEntry(BaseStoreEntry):
469 class SimpleStoreEntry(BaseStoreEntry):
470 """A generic entry in the store"""
470 """A generic entry in the store"""
471
471
472 is_revlog = False
472 is_revlog = False
473
473
474 _entry_path = attr.ib()
474 _entry_path = attr.ib()
475 _is_volatile = attr.ib(default=False)
475 _is_volatile = attr.ib(default=False)
476 _file_size = attr.ib(default=None)
476 _file_size = attr.ib(default=None)
477
477
478 def __init__(
478 def __init__(
479 self,
479 self,
480 entry_path,
480 entry_path,
481 is_volatile=False,
481 is_volatile=False,
482 file_size=None,
482 file_size=None,
483 ):
483 ):
484 super().__init__()
484 super().__init__()
485 self._entry_path = entry_path
485 self._entry_path = entry_path
486 self._is_volatile = is_volatile
486 self._is_volatile = is_volatile
487 self._file_size = file_size
487 self._file_size = file_size
488
488
489 def files(self):
489 def files(self):
490 return [
490 return [
491 StoreFile(
491 StoreFile(
492 unencoded_path=self._entry_path,
492 unencoded_path=self._entry_path,
493 file_size=self._file_size,
493 file_size=self._file_size,
494 is_volatile=self._is_volatile,
494 is_volatile=self._is_volatile,
495 )
495 )
496 ]
496 ]
497
497
498
498
499 @attr.s(slots=True, init=False)
499 @attr.s(slots=True, init=False)
500 class RevlogStoreEntry(BaseStoreEntry):
500 class RevlogStoreEntry(BaseStoreEntry):
501 """A revlog entry in the store"""
501 """A revlog entry in the store"""
502
502
503 is_revlog = True
503 is_revlog = True
504
504
505 revlog_type = attr.ib(default=None)
505 revlog_type = attr.ib(default=None)
506 target_id = attr.ib(default=None)
506 target_id = attr.ib(default=None)
507 _path_prefix = attr.ib(default=None)
507 _path_prefix = attr.ib(default=None)
508 _details = attr.ib(default=None)
508 _details = attr.ib(default=None)
509
509
510 def __init__(
510 def __init__(
511 self,
511 self,
512 revlog_type,
512 revlog_type,
513 path_prefix,
513 path_prefix,
514 target_id,
514 target_id,
515 details,
515 details,
516 ):
516 ):
517 super().__init__()
517 super().__init__()
518 self.revlog_type = revlog_type
518 self.revlog_type = revlog_type
519 self.target_id = target_id
519 self.target_id = target_id
520 self._path_prefix = path_prefix
520 self._path_prefix = path_prefix
521 assert b'.i' in details, (path_prefix, details)
521 assert b'.i' in details, (path_prefix, details)
522 self._details = details
522 self._details = details
523
523
524 @property
524 @property
525 def is_changelog(self):
525 def is_changelog(self):
526 return self.revlog_type & FILEFLAGS_CHANGELOG
526 return self.revlog_type & FILEFLAGS_CHANGELOG
527
527
528 @property
528 @property
529 def is_manifestlog(self):
529 def is_manifestlog(self):
530 return self.revlog_type & FILEFLAGS_MANIFESTLOG
530 return self.revlog_type & FILEFLAGS_MANIFESTLOG
531
531
532 @property
532 @property
533 def is_filelog(self):
533 def is_filelog(self):
534 return self.revlog_type & FILEFLAGS_FILELOG
534 return self.revlog_type & FILEFLAGS_FILELOG
535
535
536 def main_file_path(self):
536 def main_file_path(self):
537 """unencoded path of the main revlog file"""
537 """unencoded path of the main revlog file"""
538 return self._path_prefix + b'.i'
538 return self._path_prefix + b'.i'
539
539
540 def files(self):
540 def files(self):
541 files = []
541 files = []
542 for ext in sorted(self._details, key=_ext_key):
542 for ext in sorted(self._details, key=_ext_key):
543 path = self._path_prefix + ext
543 path = self._path_prefix + ext
544 data = self._details[ext]
544 data = self._details[ext]
545 files.append(StoreFile(unencoded_path=path, **data))
545 files.append(StoreFile(unencoded_path=path, **data))
546 return files
546 return files
547
547
548
548
549 @attr.s(slots=True)
549 @attr.s(slots=True)
550 class StoreFile:
550 class StoreFile:
551 """a file matching an entry"""
551 """a file matching an entry"""
552
552
553 unencoded_path = attr.ib()
553 unencoded_path = attr.ib()
554 _file_size = attr.ib(default=None)
554 _file_size = attr.ib(default=None)
555 is_volatile = attr.ib(default=False)
555 is_volatile = attr.ib(default=False)
556
556
557 def file_size(self, vfs):
557 def file_size(self, vfs):
558 if self._file_size is not None:
558 if self._file_size is not None:
559 return self._file_size
559 return self._file_size
560 try:
560 try:
561 return vfs.stat(self.unencoded_path).st_size
561 return vfs.stat(self.unencoded_path).st_size
562 except FileNotFoundError:
562 except FileNotFoundError:
563 return 0
563 return 0
564
564
565
565
566 def _gather_revlog(files_data):
566 def _gather_revlog(files_data):
567 """group files per revlog prefix
567 """group files per revlog prefix
568
568
569 The returns a two level nested dict. The top level key is the revlog prefix
569 The returns a two level nested dict. The top level key is the revlog prefix
570 without extension, the second level is all the file "suffix" that were
570 without extension, the second level is all the file "suffix" that were
571 seen for this revlog and arbitrary file data as value.
571 seen for this revlog and arbitrary file data as value.
572 """
572 """
573 revlogs = collections.defaultdict(dict)
573 revlogs = collections.defaultdict(dict)
574 for u, value in files_data:
574 for u, value in files_data:
575 name, ext = _split_revlog_ext(u)
575 name, ext = _split_revlog_ext(u)
576 revlogs[name][ext] = value
576 revlogs[name][ext] = value
577 return sorted(revlogs.items())
577 return sorted(revlogs.items())
578
578
579
579
580 def _split_revlog_ext(filename):
580 def _split_revlog_ext(filename):
581 """split the revlog file prefix from the variable extension"""
581 """split the revlog file prefix from the variable extension"""
582 if filename.endswith(REVLOG_FILES_LONG_EXT):
582 if filename.endswith(REVLOG_FILES_LONG_EXT):
583 char = b'-'
583 char = b'-'
584 else:
584 else:
585 char = b'.'
585 char = b'.'
586 idx = filename.rfind(char)
586 idx = filename.rfind(char)
587 return filename[:idx], filename[idx:]
587 return filename[:idx], filename[idx:]
588
588
589
589
590 def _ext_key(ext):
590 def _ext_key(ext):
591 """a key to order revlog suffix
591 """a key to order revlog suffix
592
592
593 important to issue .i after other entry."""
593 important to issue .i after other entry."""
594 # the only important part of this order is to keep the `.i` last.
594 # the only important part of this order is to keep the `.i` last.
595 if ext.endswith(b'.n'):
595 if ext.endswith(b'.n'):
596 return (0, ext)
596 return (0, ext)
597 elif ext.endswith(b'.nd'):
597 elif ext.endswith(b'.nd'):
598 return (10, ext)
598 return (10, ext)
599 elif ext.endswith(b'.d'):
599 elif ext.endswith(b'.d'):
600 return (20, ext)
600 return (20, ext)
601 elif ext.endswith(b'.i'):
601 elif ext.endswith(b'.i'):
602 return (50, ext)
602 return (50, ext)
603 else:
603 else:
604 return (40, ext)
604 return (40, ext)
605
605
606
606
607 class basicstore:
607 class basicstore:
608 '''base class for local repository stores'''
608 '''base class for local repository stores'''
609
609
610 def __init__(self, path, vfstype):
610 def __init__(self, path, vfstype):
611 vfs = vfstype(path)
611 vfs = vfstype(path)
612 self.path = vfs.base
612 self.path = vfs.base
613 self.createmode = _calcmode(vfs)
613 self.createmode = _calcmode(vfs)
614 vfs.createmode = self.createmode
614 vfs.createmode = self.createmode
615 self.rawvfs = vfs
615 self.rawvfs = vfs
616 self.vfs = vfsmod.filtervfs(vfs, encodedir)
616 self.vfs = vfsmod.filtervfs(vfs, encodedir)
617 self.opener = self.vfs
617 self.opener = self.vfs
618
618
619 def join(self, f):
619 def join(self, f):
620 return self.path + b'/' + encodedir(f)
620 return self.path + b'/' + encodedir(f)
621
621
622 def _walk(self, relpath, recurse, undecodable=None):
622 def _walk(self, relpath, recurse, undecodable=None):
623 '''yields (revlog_type, unencoded, size)'''
623 '''yields (revlog_type, unencoded, size)'''
624 path = self.path
624 path = self.path
625 if relpath:
625 if relpath:
626 path += b'/' + relpath
626 path += b'/' + relpath
627 striplen = len(self.path) + 1
627 striplen = len(self.path) + 1
628 l = []
628 l = []
629 if self.rawvfs.isdir(path):
629 if self.rawvfs.isdir(path):
630 visit = [path]
630 visit = [path]
631 readdir = self.rawvfs.readdir
631 readdir = self.rawvfs.readdir
632 while visit:
632 while visit:
633 p = visit.pop()
633 p = visit.pop()
634 for f, kind, st in readdir(p, stat=True):
634 for f, kind, st in readdir(p, stat=True):
635 fp = p + b'/' + f
635 fp = p + b'/' + f
636 rl_type = is_revlog(f, kind, st)
636 rl_type = is_revlog(f, kind, st)
637 if rl_type is not None:
637 if rl_type is not None:
638 n = util.pconvert(fp[striplen:])
638 n = util.pconvert(fp[striplen:])
639 l.append((decodedir(n), (rl_type, st.st_size)))
639 l.append((decodedir(n), (rl_type, st.st_size)))
640 elif kind == stat.S_IFDIR and recurse:
640 elif kind == stat.S_IFDIR and recurse:
641 visit.append(fp)
641 visit.append(fp)
642
642
643 l.sort()
643 l.sort()
644 return l
644 return l
645
645
646 def changelog(self, trypending, concurrencychecker=None):
646 def changelog(self, trypending, concurrencychecker=None):
647 return changelog.changelog(
647 return changelog.changelog(
648 self.vfs,
648 self.vfs,
649 trypending=trypending,
649 trypending=trypending,
650 concurrencychecker=concurrencychecker,
650 concurrencychecker=concurrencychecker,
651 )
651 )
652
652
653 def manifestlog(self, repo, storenarrowmatch):
653 def manifestlog(self, repo, storenarrowmatch):
654 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
654 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
655 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
655 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
656
656
657 def data_entries(
657 def data_entries(
658 self, matcher=None, undecodable=None
658 self, matcher=None, undecodable=None
659 ) -> Generator[BaseStoreEntry, None, None]:
659 ) -> Generator[BaseStoreEntry, None, None]:
660 """Like walk, but excluding the changelog and root manifest.
660 """Like walk, but excluding the changelog and root manifest.
661
661
662 When [undecodable] is None, revlogs names that can't be
662 When [undecodable] is None, revlogs names that can't be
663 decoded cause an exception. When it is provided, it should
663 decoded cause an exception. When it is provided, it should
664 be a list and the filenames that can't be decoded are added
664 be a list and the filenames that can't be decoded are added
665 to it instead. This is very rarely needed."""
665 to it instead. This is very rarely needed."""
666 dirs = [
666 dirs = [
667 (b'data', FILEFLAGS_FILELOG),
667 (b'data', FILEFLAGS_FILELOG),
668 (b'meta', FILEFLAGS_MANIFESTLOG),
668 (b'meta', FILEFLAGS_MANIFESTLOG),
669 ]
669 ]
670 for base_dir, rl_type in dirs:
670 for base_dir, rl_type in dirs:
671 files = self._walk(base_dir, True, undecodable=undecodable)
671 files = self._walk(base_dir, True, undecodable=undecodable)
672 files = (f for f in files if f[1][0] is not None)
672 files = (f for f in files if f[1][0] is not None)
673 for revlog, details in _gather_revlog(files):
673 for revlog, details in _gather_revlog(files):
674 file_details = {}
674 file_details = {}
675 revlog_target_id = revlog.split(b'/', 1)[1]
675 revlog_target_id = revlog.split(b'/', 1)[1]
676 for ext, (t, s) in sorted(details.items()):
676 for ext, (t, s) in sorted(details.items()):
677 file_details[ext] = {
677 file_details[ext] = {
678 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
678 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
679 'file_size': s,
679 'file_size': s,
680 }
680 }
681 yield RevlogStoreEntry(
681 yield RevlogStoreEntry(
682 path_prefix=revlog,
682 path_prefix=revlog,
683 revlog_type=rl_type,
683 revlog_type=rl_type,
684 target_id=revlog_target_id,
684 target_id=revlog_target_id,
685 details=file_details,
685 details=file_details,
686 )
686 )
687
687
688 def top_entries(self, phase=False) -> Generator[BaseStoreEntry, None, None]:
688 def top_entries(
689 self, phase=False, obsolescence=False
690 ) -> Generator[BaseStoreEntry, None, None]:
689 if phase and self.vfs.exists(b'phaseroots'):
691 if phase and self.vfs.exists(b'phaseroots'):
690 yield SimpleStoreEntry(
692 yield SimpleStoreEntry(
691 entry_path=b'phaseroots',
693 entry_path=b'phaseroots',
692 is_volatile=True,
694 is_volatile=True,
693 )
695 )
694
696
697 if obsolescence and self.vfs.exists(b'obsstore'):
698 # XXX if we had the file size it could be non-volatile
699 yield SimpleStoreEntry(
700 entry_path=b'obsstore',
701 is_volatile=True,
702 )
703
695 files = reversed(self._walk(b'', False))
704 files = reversed(self._walk(b'', False))
696
705
697 changelogs = collections.defaultdict(dict)
706 changelogs = collections.defaultdict(dict)
698 manifestlogs = collections.defaultdict(dict)
707 manifestlogs = collections.defaultdict(dict)
699
708
700 for u, (t, s) in files:
709 for u, (t, s) in files:
701 if u.startswith(b'00changelog'):
710 if u.startswith(b'00changelog'):
702 name, ext = _split_revlog_ext(u)
711 name, ext = _split_revlog_ext(u)
703 changelogs[name][ext] = (t, s)
712 changelogs[name][ext] = (t, s)
704 elif u.startswith(b'00manifest'):
713 elif u.startswith(b'00manifest'):
705 name, ext = _split_revlog_ext(u)
714 name, ext = _split_revlog_ext(u)
706 manifestlogs[name][ext] = (t, s)
715 manifestlogs[name][ext] = (t, s)
707 else:
716 else:
708 yield SimpleStoreEntry(
717 yield SimpleStoreEntry(
709 entry_path=u,
718 entry_path=u,
710 is_volatile=bool(t & FILEFLAGS_VOLATILE),
719 is_volatile=bool(t & FILEFLAGS_VOLATILE),
711 file_size=s,
720 file_size=s,
712 )
721 )
713 # yield manifest before changelog
722 # yield manifest before changelog
714 top_rl = [
723 top_rl = [
715 (manifestlogs, FILEFLAGS_MANIFESTLOG),
724 (manifestlogs, FILEFLAGS_MANIFESTLOG),
716 (changelogs, FILEFLAGS_CHANGELOG),
725 (changelogs, FILEFLAGS_CHANGELOG),
717 ]
726 ]
718 assert len(manifestlogs) <= 1
727 assert len(manifestlogs) <= 1
719 assert len(changelogs) <= 1
728 assert len(changelogs) <= 1
720 for data, revlog_type in top_rl:
729 for data, revlog_type in top_rl:
721 for revlog, details in sorted(data.items()):
730 for revlog, details in sorted(data.items()):
722 file_details = {}
731 file_details = {}
723 for ext, (t, s) in details.items():
732 for ext, (t, s) in details.items():
724 file_details[ext] = {
733 file_details[ext] = {
725 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
734 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
726 'file_size': s,
735 'file_size': s,
727 }
736 }
728 yield RevlogStoreEntry(
737 yield RevlogStoreEntry(
729 path_prefix=revlog,
738 path_prefix=revlog,
730 revlog_type=revlog_type,
739 revlog_type=revlog_type,
731 target_id=b'',
740 target_id=b'',
732 details=file_details,
741 details=file_details,
733 )
742 )
734
743
735 def walk(
744 def walk(
736 self, matcher=None, phase=False
745 self, matcher=None, phase=False, obsolescence=False
737 ) -> Generator[BaseStoreEntry, None, None]:
746 ) -> Generator[BaseStoreEntry, None, None]:
738 """return files related to data storage (ie: revlogs)
747 """return files related to data storage (ie: revlogs)
739
748
740 yields instance from BaseStoreEntry subclasses
749 yields instance from BaseStoreEntry subclasses
741
750
742 if a matcher is passed, storage files of only those tracked paths
751 if a matcher is passed, storage files of only those tracked paths
743 are passed with matches the matcher
752 are passed with matches the matcher
744 """
753 """
745 # yield data files first
754 # yield data files first
746 for x in self.data_entries(matcher):
755 for x in self.data_entries(matcher):
747 yield x
756 yield x
748 for x in self.top_entries(phase=phase):
757 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
749 yield x
758 yield x
750
759
751 def copylist(self):
760 def copylist(self):
752 return _data
761 return _data
753
762
754 def write(self, tr):
763 def write(self, tr):
755 pass
764 pass
756
765
757 def invalidatecaches(self):
766 def invalidatecaches(self):
758 pass
767 pass
759
768
760 def markremoved(self, fn):
769 def markremoved(self, fn):
761 pass
770 pass
762
771
763 def __contains__(self, path):
772 def __contains__(self, path):
764 '''Checks if the store contains path'''
773 '''Checks if the store contains path'''
765 path = b"/".join((b"data", path))
774 path = b"/".join((b"data", path))
766 # file?
775 # file?
767 if self.vfs.exists(path + b".i"):
776 if self.vfs.exists(path + b".i"):
768 return True
777 return True
769 # dir?
778 # dir?
770 if not path.endswith(b"/"):
779 if not path.endswith(b"/"):
771 path = path + b"/"
780 path = path + b"/"
772 return self.vfs.exists(path)
781 return self.vfs.exists(path)
773
782
774
783
775 class encodedstore(basicstore):
784 class encodedstore(basicstore):
776 def __init__(self, path, vfstype):
785 def __init__(self, path, vfstype):
777 vfs = vfstype(path + b'/store')
786 vfs = vfstype(path + b'/store')
778 self.path = vfs.base
787 self.path = vfs.base
779 self.createmode = _calcmode(vfs)
788 self.createmode = _calcmode(vfs)
780 vfs.createmode = self.createmode
789 vfs.createmode = self.createmode
781 self.rawvfs = vfs
790 self.rawvfs = vfs
782 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
791 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
783 self.opener = self.vfs
792 self.opener = self.vfs
784
793
785 def _walk(self, relpath, recurse, undecodable=None):
794 def _walk(self, relpath, recurse, undecodable=None):
786 old = super()._walk(relpath, recurse)
795 old = super()._walk(relpath, recurse)
787 new = []
796 new = []
788 for f1, value in old:
797 for f1, value in old:
789 try:
798 try:
790 f2 = decodefilename(f1)
799 f2 = decodefilename(f1)
791 except KeyError:
800 except KeyError:
792 if undecodable is None:
801 if undecodable is None:
793 msg = _(b'undecodable revlog name %s') % f1
802 msg = _(b'undecodable revlog name %s') % f1
794 raise error.StorageError(msg)
803 raise error.StorageError(msg)
795 else:
804 else:
796 undecodable.append(f1)
805 undecodable.append(f1)
797 continue
806 continue
798 new.append((f2, value))
807 new.append((f2, value))
799 return new
808 return new
800
809
801 def data_entries(
810 def data_entries(
802 self, matcher=None, undecodable=None
811 self, matcher=None, undecodable=None
803 ) -> Generator[BaseStoreEntry, None, None]:
812 ) -> Generator[BaseStoreEntry, None, None]:
804 entries = super(encodedstore, self).data_entries(
813 entries = super(encodedstore, self).data_entries(
805 undecodable=undecodable
814 undecodable=undecodable
806 )
815 )
807 for entry in entries:
816 for entry in entries:
808 if _match_tracked_entry(entry, matcher):
817 if _match_tracked_entry(entry, matcher):
809 yield entry
818 yield entry
810
819
811 def join(self, f):
820 def join(self, f):
812 return self.path + b'/' + encodefilename(f)
821 return self.path + b'/' + encodefilename(f)
813
822
814 def copylist(self):
823 def copylist(self):
815 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
824 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
816
825
817
826
818 class fncache:
827 class fncache:
819 # the filename used to be partially encoded
828 # the filename used to be partially encoded
820 # hence the encodedir/decodedir dance
829 # hence the encodedir/decodedir dance
821 def __init__(self, vfs):
830 def __init__(self, vfs):
822 self.vfs = vfs
831 self.vfs = vfs
823 self._ignores = set()
832 self._ignores = set()
824 self.entries = None
833 self.entries = None
825 self._dirty = False
834 self._dirty = False
826 # set of new additions to fncache
835 # set of new additions to fncache
827 self.addls = set()
836 self.addls = set()
828
837
829 def ensureloaded(self, warn=None):
838 def ensureloaded(self, warn=None):
830 """read the fncache file if not already read.
839 """read the fncache file if not already read.
831
840
832 If the file on disk is corrupted, raise. If warn is provided,
841 If the file on disk is corrupted, raise. If warn is provided,
833 warn and keep going instead."""
842 warn and keep going instead."""
834 if self.entries is None:
843 if self.entries is None:
835 self._load(warn)
844 self._load(warn)
836
845
837 def _load(self, warn=None):
846 def _load(self, warn=None):
838 '''fill the entries from the fncache file'''
847 '''fill the entries from the fncache file'''
839 self._dirty = False
848 self._dirty = False
840 try:
849 try:
841 fp = self.vfs(b'fncache', mode=b'rb')
850 fp = self.vfs(b'fncache', mode=b'rb')
842 except IOError:
851 except IOError:
843 # skip nonexistent file
852 # skip nonexistent file
844 self.entries = set()
853 self.entries = set()
845 return
854 return
846
855
847 self.entries = set()
856 self.entries = set()
848 chunk = b''
857 chunk = b''
849 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
858 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
850 chunk += c
859 chunk += c
851 try:
860 try:
852 p = chunk.rindex(b'\n')
861 p = chunk.rindex(b'\n')
853 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
862 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
854 chunk = chunk[p + 1 :]
863 chunk = chunk[p + 1 :]
855 except ValueError:
864 except ValueError:
856 # substring '\n' not found, maybe the entry is bigger than the
865 # substring '\n' not found, maybe the entry is bigger than the
857 # chunksize, so let's keep iterating
866 # chunksize, so let's keep iterating
858 pass
867 pass
859
868
860 if chunk:
869 if chunk:
861 msg = _(b"fncache does not ends with a newline")
870 msg = _(b"fncache does not ends with a newline")
862 if warn:
871 if warn:
863 warn(msg + b'\n')
872 warn(msg + b'\n')
864 else:
873 else:
865 raise error.Abort(
874 raise error.Abort(
866 msg,
875 msg,
867 hint=_(
876 hint=_(
868 b"use 'hg debugrebuildfncache' to "
877 b"use 'hg debugrebuildfncache' to "
869 b"rebuild the fncache"
878 b"rebuild the fncache"
870 ),
879 ),
871 )
880 )
872 self._checkentries(fp, warn)
881 self._checkentries(fp, warn)
873 fp.close()
882 fp.close()
874
883
875 def _checkentries(self, fp, warn):
884 def _checkentries(self, fp, warn):
876 """make sure there is no empty string in entries"""
885 """make sure there is no empty string in entries"""
877 if b'' in self.entries:
886 if b'' in self.entries:
878 fp.seek(0)
887 fp.seek(0)
879 for n, line in enumerate(fp):
888 for n, line in enumerate(fp):
880 if not line.rstrip(b'\n'):
889 if not line.rstrip(b'\n'):
881 t = _(b'invalid entry in fncache, line %d') % (n + 1)
890 t = _(b'invalid entry in fncache, line %d') % (n + 1)
882 if warn:
891 if warn:
883 warn(t + b'\n')
892 warn(t + b'\n')
884 else:
893 else:
885 raise error.Abort(t)
894 raise error.Abort(t)
886
895
887 def write(self, tr):
896 def write(self, tr):
888 if self._dirty:
897 if self._dirty:
889 assert self.entries is not None
898 assert self.entries is not None
890 self.entries = self.entries | self.addls
899 self.entries = self.entries | self.addls
891 self.addls = set()
900 self.addls = set()
892 tr.addbackup(b'fncache')
901 tr.addbackup(b'fncache')
893 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
902 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
894 if self.entries:
903 if self.entries:
895 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
904 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
896 fp.close()
905 fp.close()
897 self._dirty = False
906 self._dirty = False
898 if self.addls:
907 if self.addls:
899 # if we have just new entries, let's append them to the fncache
908 # if we have just new entries, let's append them to the fncache
900 tr.addbackup(b'fncache')
909 tr.addbackup(b'fncache')
901 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
910 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
902 if self.addls:
911 if self.addls:
903 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
912 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
904 fp.close()
913 fp.close()
905 self.entries = None
914 self.entries = None
906 self.addls = set()
915 self.addls = set()
907
916
908 def addignore(self, fn):
917 def addignore(self, fn):
909 self._ignores.add(fn)
918 self._ignores.add(fn)
910
919
911 def add(self, fn):
920 def add(self, fn):
912 if fn in self._ignores:
921 if fn in self._ignores:
913 return
922 return
914 if self.entries is None:
923 if self.entries is None:
915 self._load()
924 self._load()
916 if fn not in self.entries:
925 if fn not in self.entries:
917 self.addls.add(fn)
926 self.addls.add(fn)
918
927
919 def remove(self, fn):
928 def remove(self, fn):
920 if self.entries is None:
929 if self.entries is None:
921 self._load()
930 self._load()
922 if fn in self.addls:
931 if fn in self.addls:
923 self.addls.remove(fn)
932 self.addls.remove(fn)
924 return
933 return
925 try:
934 try:
926 self.entries.remove(fn)
935 self.entries.remove(fn)
927 self._dirty = True
936 self._dirty = True
928 except KeyError:
937 except KeyError:
929 pass
938 pass
930
939
931 def __contains__(self, fn):
940 def __contains__(self, fn):
932 if fn in self.addls:
941 if fn in self.addls:
933 return True
942 return True
934 if self.entries is None:
943 if self.entries is None:
935 self._load()
944 self._load()
936 return fn in self.entries
945 return fn in self.entries
937
946
938 def __iter__(self):
947 def __iter__(self):
939 if self.entries is None:
948 if self.entries is None:
940 self._load()
949 self._load()
941 return iter(self.entries | self.addls)
950 return iter(self.entries | self.addls)
942
951
943
952
944 class _fncachevfs(vfsmod.proxyvfs):
953 class _fncachevfs(vfsmod.proxyvfs):
945 def __init__(self, vfs, fnc, encode):
954 def __init__(self, vfs, fnc, encode):
946 vfsmod.proxyvfs.__init__(self, vfs)
955 vfsmod.proxyvfs.__init__(self, vfs)
947 self.fncache = fnc
956 self.fncache = fnc
948 self.encode = encode
957 self.encode = encode
949
958
950 def __call__(self, path, mode=b'r', *args, **kw):
959 def __call__(self, path, mode=b'r', *args, **kw):
951 encoded = self.encode(path)
960 encoded = self.encode(path)
952 if (
961 if (
953 mode not in (b'r', b'rb')
962 mode not in (b'r', b'rb')
954 and (path.startswith(b'data/') or path.startswith(b'meta/'))
963 and (path.startswith(b'data/') or path.startswith(b'meta/'))
955 and revlog_type(path) is not None
964 and revlog_type(path) is not None
956 ):
965 ):
957 # do not trigger a fncache load when adding a file that already is
966 # do not trigger a fncache load when adding a file that already is
958 # known to exist.
967 # known to exist.
959 notload = self.fncache.entries is None and self.vfs.exists(encoded)
968 notload = self.fncache.entries is None and self.vfs.exists(encoded)
960 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
969 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
961 # when appending to an existing file, if the file has size zero,
970 # when appending to an existing file, if the file has size zero,
962 # it should be considered as missing. Such zero-size files are
971 # it should be considered as missing. Such zero-size files are
963 # the result of truncation when a transaction is aborted.
972 # the result of truncation when a transaction is aborted.
964 notload = False
973 notload = False
965 if not notload:
974 if not notload:
966 self.fncache.add(path)
975 self.fncache.add(path)
967 return self.vfs(encoded, mode, *args, **kw)
976 return self.vfs(encoded, mode, *args, **kw)
968
977
969 def join(self, path):
978 def join(self, path):
970 if path:
979 if path:
971 return self.vfs.join(self.encode(path))
980 return self.vfs.join(self.encode(path))
972 else:
981 else:
973 return self.vfs.join(path)
982 return self.vfs.join(path)
974
983
975 def register_file(self, path):
984 def register_file(self, path):
976 """generic hook point to lets fncache steer its stew"""
985 """generic hook point to lets fncache steer its stew"""
977 if path.startswith(b'data/') or path.startswith(b'meta/'):
986 if path.startswith(b'data/') or path.startswith(b'meta/'):
978 self.fncache.add(path)
987 self.fncache.add(path)
979
988
980
989
981 class fncachestore(basicstore):
990 class fncachestore(basicstore):
982 def __init__(self, path, vfstype, dotencode):
991 def __init__(self, path, vfstype, dotencode):
983 if dotencode:
992 if dotencode:
984 encode = _pathencode
993 encode = _pathencode
985 else:
994 else:
986 encode = _plainhybridencode
995 encode = _plainhybridencode
987 self.encode = encode
996 self.encode = encode
988 vfs = vfstype(path + b'/store')
997 vfs = vfstype(path + b'/store')
989 self.path = vfs.base
998 self.path = vfs.base
990 self.pathsep = self.path + b'/'
999 self.pathsep = self.path + b'/'
991 self.createmode = _calcmode(vfs)
1000 self.createmode = _calcmode(vfs)
992 vfs.createmode = self.createmode
1001 vfs.createmode = self.createmode
993 self.rawvfs = vfs
1002 self.rawvfs = vfs
994 fnc = fncache(vfs)
1003 fnc = fncache(vfs)
995 self.fncache = fnc
1004 self.fncache = fnc
996 self.vfs = _fncachevfs(vfs, fnc, encode)
1005 self.vfs = _fncachevfs(vfs, fnc, encode)
997 self.opener = self.vfs
1006 self.opener = self.vfs
998
1007
999 def join(self, f):
1008 def join(self, f):
1000 return self.pathsep + self.encode(f)
1009 return self.pathsep + self.encode(f)
1001
1010
1002 def getsize(self, path):
1011 def getsize(self, path):
1003 return self.rawvfs.stat(path).st_size
1012 return self.rawvfs.stat(path).st_size
1004
1013
1005 def data_entries(
1014 def data_entries(
1006 self, matcher=None, undecodable=None
1015 self, matcher=None, undecodable=None
1007 ) -> Generator[BaseStoreEntry, None, None]:
1016 ) -> Generator[BaseStoreEntry, None, None]:
1008 files = ((f, revlog_type(f)) for f in self.fncache)
1017 files = ((f, revlog_type(f)) for f in self.fncache)
1009 # Note: all files in fncache should be revlog related, However the
1018 # Note: all files in fncache should be revlog related, However the
1010 # fncache might contains such file added by previous version of
1019 # fncache might contains such file added by previous version of
1011 # Mercurial.
1020 # Mercurial.
1012 files = (f for f in files if f[1] is not None)
1021 files = (f for f in files if f[1] is not None)
1013 by_revlog = _gather_revlog(files)
1022 by_revlog = _gather_revlog(files)
1014 for revlog, details in by_revlog:
1023 for revlog, details in by_revlog:
1015 file_details = {}
1024 file_details = {}
1016 if revlog.startswith(b'data/'):
1025 if revlog.startswith(b'data/'):
1017 rl_type = FILEFLAGS_FILELOG
1026 rl_type = FILEFLAGS_FILELOG
1018 revlog_target_id = revlog.split(b'/', 1)[1]
1027 revlog_target_id = revlog.split(b'/', 1)[1]
1019 elif revlog.startswith(b'meta/'):
1028 elif revlog.startswith(b'meta/'):
1020 rl_type = FILEFLAGS_MANIFESTLOG
1029 rl_type = FILEFLAGS_MANIFESTLOG
1021 # drop the initial directory and the `00manifest` file part
1030 # drop the initial directory and the `00manifest` file part
1022 tmp = revlog.split(b'/', 1)[1]
1031 tmp = revlog.split(b'/', 1)[1]
1023 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1032 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1024 else:
1033 else:
1025 # unreachable
1034 # unreachable
1026 assert False, revlog
1035 assert False, revlog
1027 for ext, t in details.items():
1036 for ext, t in details.items():
1028 file_details[ext] = {
1037 file_details[ext] = {
1029 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1038 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1030 }
1039 }
1031 entry = RevlogStoreEntry(
1040 entry = RevlogStoreEntry(
1032 path_prefix=revlog,
1041 path_prefix=revlog,
1033 revlog_type=rl_type,
1042 revlog_type=rl_type,
1034 target_id=revlog_target_id,
1043 target_id=revlog_target_id,
1035 details=file_details,
1044 details=file_details,
1036 )
1045 )
1037 if _match_tracked_entry(entry, matcher):
1046 if _match_tracked_entry(entry, matcher):
1038 yield entry
1047 yield entry
1039
1048
1040 def copylist(self):
1049 def copylist(self):
1041 d = (
1050 d = (
1042 b'bookmarks',
1051 b'bookmarks',
1043 b'narrowspec',
1052 b'narrowspec',
1044 b'data',
1053 b'data',
1045 b'meta',
1054 b'meta',
1046 b'dh',
1055 b'dh',
1047 b'fncache',
1056 b'fncache',
1048 b'phaseroots',
1057 b'phaseroots',
1049 b'obsstore',
1058 b'obsstore',
1050 b'00manifest.d',
1059 b'00manifest.d',
1051 b'00manifest.i',
1060 b'00manifest.i',
1052 b'00changelog.d',
1061 b'00changelog.d',
1053 b'00changelog.i',
1062 b'00changelog.i',
1054 b'requires',
1063 b'requires',
1055 )
1064 )
1056 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1065 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1057
1066
1058 def write(self, tr):
1067 def write(self, tr):
1059 self.fncache.write(tr)
1068 self.fncache.write(tr)
1060
1069
1061 def invalidatecaches(self):
1070 def invalidatecaches(self):
1062 self.fncache.entries = None
1071 self.fncache.entries = None
1063 self.fncache.addls = set()
1072 self.fncache.addls = set()
1064
1073
1065 def markremoved(self, fn):
1074 def markremoved(self, fn):
1066 self.fncache.remove(fn)
1075 self.fncache.remove(fn)
1067
1076
1068 def _exists(self, f):
1077 def _exists(self, f):
1069 ef = self.encode(f)
1078 ef = self.encode(f)
1070 try:
1079 try:
1071 self.getsize(ef)
1080 self.getsize(ef)
1072 return True
1081 return True
1073 except FileNotFoundError:
1082 except FileNotFoundError:
1074 return False
1083 return False
1075
1084
1076 def __contains__(self, path):
1085 def __contains__(self, path):
1077 '''Checks if the store contains path'''
1086 '''Checks if the store contains path'''
1078 path = b"/".join((b"data", path))
1087 path = b"/".join((b"data", path))
1079 # check for files (exact match)
1088 # check for files (exact match)
1080 e = path + b'.i'
1089 e = path + b'.i'
1081 if e in self.fncache and self._exists(e):
1090 if e in self.fncache and self._exists(e):
1082 return True
1091 return True
1083 # now check for directories (prefix match)
1092 # now check for directories (prefix match)
1084 if not path.endswith(b'/'):
1093 if not path.endswith(b'/'):
1085 path += b'/'
1094 path += b'/'
1086 for e in self.fncache:
1095 for e in self.fncache:
1087 if e.startswith(path) and self._exists(e):
1096 if e.startswith(path) and self._exists(e):
1088 return True
1097 return True
1089 return False
1098 return False
@@ -1,936 +1,936 b''
1 # streamclone.py - producing and consuming streaming repository data
1 # streamclone.py - producing and consuming streaming repository data
2 #
2 #
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
3 # Copyright 2015 Gregory Szorc <gregory.szorc@gmail.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import contextlib
9 import contextlib
10 import os
10 import os
11 import struct
11 import struct
12
12
13 from .i18n import _
13 from .i18n import _
14 from .pycompat import open
14 from .pycompat import open
15 from .interfaces import repository
15 from .interfaces import repository
16 from . import (
16 from . import (
17 bookmarks,
17 bookmarks,
18 cacheutil,
18 cacheutil,
19 error,
19 error,
20 narrowspec,
20 narrowspec,
21 phases,
21 phases,
22 pycompat,
22 pycompat,
23 requirements as requirementsmod,
23 requirements as requirementsmod,
24 scmutil,
24 scmutil,
25 store,
25 store,
26 transaction,
26 transaction,
27 util,
27 util,
28 )
28 )
29 from .revlogutils import (
29 from .revlogutils import (
30 nodemap,
30 nodemap,
31 )
31 )
32
32
33
33
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
34 def new_stream_clone_requirements(default_requirements, streamed_requirements):
35 """determine the final set of requirement for a new stream clone
35 """determine the final set of requirement for a new stream clone
36
36
37 this method combine the "default" requirements that a new repository would
37 this method combine the "default" requirements that a new repository would
38 use with the constaint we get from the stream clone content. We keep local
38 use with the constaint we get from the stream clone content. We keep local
39 configuration choice when possible.
39 configuration choice when possible.
40 """
40 """
41 requirements = set(default_requirements)
41 requirements = set(default_requirements)
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
42 requirements -= requirementsmod.STREAM_FIXED_REQUIREMENTS
43 requirements.update(streamed_requirements)
43 requirements.update(streamed_requirements)
44 return requirements
44 return requirements
45
45
46
46
47 def streamed_requirements(repo):
47 def streamed_requirements(repo):
48 """the set of requirement the new clone will have to support
48 """the set of requirement the new clone will have to support
49
49
50 This is used for advertising the stream options and to generate the actual
50 This is used for advertising the stream options and to generate the actual
51 stream content."""
51 stream content."""
52 requiredformats = (
52 requiredformats = (
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
53 repo.requirements & requirementsmod.STREAM_FIXED_REQUIREMENTS
54 )
54 )
55 return requiredformats
55 return requiredformats
56
56
57
57
58 def canperformstreamclone(pullop, bundle2=False):
58 def canperformstreamclone(pullop, bundle2=False):
59 """Whether it is possible to perform a streaming clone as part of pull.
59 """Whether it is possible to perform a streaming clone as part of pull.
60
60
61 ``bundle2`` will cause the function to consider stream clone through
61 ``bundle2`` will cause the function to consider stream clone through
62 bundle2 and only through bundle2.
62 bundle2 and only through bundle2.
63
63
64 Returns a tuple of (supported, requirements). ``supported`` is True if
64 Returns a tuple of (supported, requirements). ``supported`` is True if
65 streaming clone is supported and False otherwise. ``requirements`` is
65 streaming clone is supported and False otherwise. ``requirements`` is
66 a set of repo requirements from the remote, or ``None`` if stream clone
66 a set of repo requirements from the remote, or ``None`` if stream clone
67 isn't supported.
67 isn't supported.
68 """
68 """
69 repo = pullop.repo
69 repo = pullop.repo
70 remote = pullop.remote
70 remote = pullop.remote
71
71
72 bundle2supported = False
72 bundle2supported = False
73 if pullop.canusebundle2:
73 if pullop.canusebundle2:
74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
74 if b'v2' in pullop.remotebundle2caps.get(b'stream', []):
75 bundle2supported = True
75 bundle2supported = True
76 # else
76 # else
77 # Server doesn't support bundle2 stream clone or doesn't support
77 # Server doesn't support bundle2 stream clone or doesn't support
78 # the versions we support. Fall back and possibly allow legacy.
78 # the versions we support. Fall back and possibly allow legacy.
79
79
80 # Ensures legacy code path uses available bundle2.
80 # Ensures legacy code path uses available bundle2.
81 if bundle2supported and not bundle2:
81 if bundle2supported and not bundle2:
82 return False, None
82 return False, None
83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
83 # Ensures bundle2 doesn't try to do a stream clone if it isn't supported.
84 elif bundle2 and not bundle2supported:
84 elif bundle2 and not bundle2supported:
85 return False, None
85 return False, None
86
86
87 # Streaming clone only works on empty repositories.
87 # Streaming clone only works on empty repositories.
88 if len(repo):
88 if len(repo):
89 return False, None
89 return False, None
90
90
91 # Streaming clone only works if all data is being requested.
91 # Streaming clone only works if all data is being requested.
92 if pullop.heads:
92 if pullop.heads:
93 return False, None
93 return False, None
94
94
95 streamrequested = pullop.streamclonerequested
95 streamrequested = pullop.streamclonerequested
96
96
97 # If we don't have a preference, let the server decide for us. This
97 # If we don't have a preference, let the server decide for us. This
98 # likely only comes into play in LANs.
98 # likely only comes into play in LANs.
99 if streamrequested is None:
99 if streamrequested is None:
100 # The server can advertise whether to prefer streaming clone.
100 # The server can advertise whether to prefer streaming clone.
101 streamrequested = remote.capable(b'stream-preferred')
101 streamrequested = remote.capable(b'stream-preferred')
102
102
103 if not streamrequested:
103 if not streamrequested:
104 return False, None
104 return False, None
105
105
106 # In order for stream clone to work, the client has to support all the
106 # In order for stream clone to work, the client has to support all the
107 # requirements advertised by the server.
107 # requirements advertised by the server.
108 #
108 #
109 # The server advertises its requirements via the "stream" and "streamreqs"
109 # The server advertises its requirements via the "stream" and "streamreqs"
110 # capability. "stream" (a value-less capability) is advertised if and only
110 # capability. "stream" (a value-less capability) is advertised if and only
111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
111 # if the only requirement is "revlogv1." Else, the "streamreqs" capability
112 # is advertised and contains a comma-delimited list of requirements.
112 # is advertised and contains a comma-delimited list of requirements.
113 requirements = set()
113 requirements = set()
114 if remote.capable(b'stream'):
114 if remote.capable(b'stream'):
115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
115 requirements.add(requirementsmod.REVLOGV1_REQUIREMENT)
116 else:
116 else:
117 streamreqs = remote.capable(b'streamreqs')
117 streamreqs = remote.capable(b'streamreqs')
118 # This is weird and shouldn't happen with modern servers.
118 # This is weird and shouldn't happen with modern servers.
119 if not streamreqs:
119 if not streamreqs:
120 pullop.repo.ui.warn(
120 pullop.repo.ui.warn(
121 _(
121 _(
122 b'warning: stream clone requested but server has them '
122 b'warning: stream clone requested but server has them '
123 b'disabled\n'
123 b'disabled\n'
124 )
124 )
125 )
125 )
126 return False, None
126 return False, None
127
127
128 streamreqs = set(streamreqs.split(b','))
128 streamreqs = set(streamreqs.split(b','))
129 # Server requires something we don't support. Bail.
129 # Server requires something we don't support. Bail.
130 missingreqs = streamreqs - repo.supported
130 missingreqs = streamreqs - repo.supported
131 if missingreqs:
131 if missingreqs:
132 pullop.repo.ui.warn(
132 pullop.repo.ui.warn(
133 _(
133 _(
134 b'warning: stream clone requested but client is missing '
134 b'warning: stream clone requested but client is missing '
135 b'requirements: %s\n'
135 b'requirements: %s\n'
136 )
136 )
137 % b', '.join(sorted(missingreqs))
137 % b', '.join(sorted(missingreqs))
138 )
138 )
139 pullop.repo.ui.warn(
139 pullop.repo.ui.warn(
140 _(
140 _(
141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
141 b'(see https://www.mercurial-scm.org/wiki/MissingRequirement '
142 b'for more information)\n'
142 b'for more information)\n'
143 )
143 )
144 )
144 )
145 return False, None
145 return False, None
146 requirements = streamreqs
146 requirements = streamreqs
147
147
148 return True, requirements
148 return True, requirements
149
149
150
150
151 def maybeperformlegacystreamclone(pullop):
151 def maybeperformlegacystreamclone(pullop):
152 """Possibly perform a legacy stream clone operation.
152 """Possibly perform a legacy stream clone operation.
153
153
154 Legacy stream clones are performed as part of pull but before all other
154 Legacy stream clones are performed as part of pull but before all other
155 operations.
155 operations.
156
156
157 A legacy stream clone will not be performed if a bundle2 stream clone is
157 A legacy stream clone will not be performed if a bundle2 stream clone is
158 supported.
158 supported.
159 """
159 """
160 from . import localrepo
160 from . import localrepo
161
161
162 supported, requirements = canperformstreamclone(pullop)
162 supported, requirements = canperformstreamclone(pullop)
163
163
164 if not supported:
164 if not supported:
165 return
165 return
166
166
167 repo = pullop.repo
167 repo = pullop.repo
168 remote = pullop.remote
168 remote = pullop.remote
169
169
170 # Save remote branchmap. We will use it later to speed up branchcache
170 # Save remote branchmap. We will use it later to speed up branchcache
171 # creation.
171 # creation.
172 rbranchmap = None
172 rbranchmap = None
173 if remote.capable(b'branchmap'):
173 if remote.capable(b'branchmap'):
174 with remote.commandexecutor() as e:
174 with remote.commandexecutor() as e:
175 rbranchmap = e.callcommand(b'branchmap', {}).result()
175 rbranchmap = e.callcommand(b'branchmap', {}).result()
176
176
177 repo.ui.status(_(b'streaming all changes\n'))
177 repo.ui.status(_(b'streaming all changes\n'))
178
178
179 with remote.commandexecutor() as e:
179 with remote.commandexecutor() as e:
180 fp = e.callcommand(b'stream_out', {}).result()
180 fp = e.callcommand(b'stream_out', {}).result()
181
181
182 # TODO strictly speaking, this code should all be inside the context
182 # TODO strictly speaking, this code should all be inside the context
183 # manager because the context manager is supposed to ensure all wire state
183 # manager because the context manager is supposed to ensure all wire state
184 # is flushed when exiting. But the legacy peers don't do this, so it
184 # is flushed when exiting. But the legacy peers don't do this, so it
185 # doesn't matter.
185 # doesn't matter.
186 l = fp.readline()
186 l = fp.readline()
187 try:
187 try:
188 resp = int(l)
188 resp = int(l)
189 except ValueError:
189 except ValueError:
190 raise error.ResponseError(
190 raise error.ResponseError(
191 _(b'unexpected response from remote server:'), l
191 _(b'unexpected response from remote server:'), l
192 )
192 )
193 if resp == 1:
193 if resp == 1:
194 raise error.Abort(_(b'operation forbidden by server'))
194 raise error.Abort(_(b'operation forbidden by server'))
195 elif resp == 2:
195 elif resp == 2:
196 raise error.Abort(_(b'locking the remote repository failed'))
196 raise error.Abort(_(b'locking the remote repository failed'))
197 elif resp != 0:
197 elif resp != 0:
198 raise error.Abort(_(b'the server sent an unknown error code'))
198 raise error.Abort(_(b'the server sent an unknown error code'))
199
199
200 l = fp.readline()
200 l = fp.readline()
201 try:
201 try:
202 filecount, bytecount = map(int, l.split(b' ', 1))
202 filecount, bytecount = map(int, l.split(b' ', 1))
203 except (ValueError, TypeError):
203 except (ValueError, TypeError):
204 raise error.ResponseError(
204 raise error.ResponseError(
205 _(b'unexpected response from remote server:'), l
205 _(b'unexpected response from remote server:'), l
206 )
206 )
207
207
208 with repo.lock():
208 with repo.lock():
209 consumev1(repo, fp, filecount, bytecount)
209 consumev1(repo, fp, filecount, bytecount)
210 repo.requirements = new_stream_clone_requirements(
210 repo.requirements = new_stream_clone_requirements(
211 repo.requirements,
211 repo.requirements,
212 requirements,
212 requirements,
213 )
213 )
214 repo.svfs.options = localrepo.resolvestorevfsoptions(
214 repo.svfs.options = localrepo.resolvestorevfsoptions(
215 repo.ui, repo.requirements, repo.features
215 repo.ui, repo.requirements, repo.features
216 )
216 )
217 scmutil.writereporequirements(repo)
217 scmutil.writereporequirements(repo)
218 nodemap.post_stream_cleanup(repo)
218 nodemap.post_stream_cleanup(repo)
219
219
220 if rbranchmap:
220 if rbranchmap:
221 repo._branchcaches.replace(repo, rbranchmap)
221 repo._branchcaches.replace(repo, rbranchmap)
222
222
223 repo.invalidate()
223 repo.invalidate()
224
224
225
225
226 def allowservergeneration(repo):
226 def allowservergeneration(repo):
227 """Whether streaming clones are allowed from the server."""
227 """Whether streaming clones are allowed from the server."""
228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
228 if repository.REPO_FEATURE_STREAM_CLONE not in repo.features:
229 return False
229 return False
230
230
231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
231 if not repo.ui.configbool(b'server', b'uncompressed', untrusted=True):
232 return False
232 return False
233
233
234 # The way stream clone works makes it impossible to hide secret changesets.
234 # The way stream clone works makes it impossible to hide secret changesets.
235 # So don't allow this by default.
235 # So don't allow this by default.
236 secret = phases.hassecret(repo)
236 secret = phases.hassecret(repo)
237 if secret:
237 if secret:
238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
238 return repo.ui.configbool(b'server', b'uncompressedallowsecret')
239
239
240 return True
240 return True
241
241
242
242
243 # This is it's own function so extensions can override it.
243 # This is it's own function so extensions can override it.
244 def _walkstreamfiles(repo, matcher=None, phase=False):
244 def _walkstreamfiles(repo, matcher=None, phase=False, obsolescence=False):
245 return repo.store.walk(matcher, phase=phase)
245 return repo.store.walk(matcher, phase=phase, obsolescence=obsolescence)
246
246
247
247
248 def generatev1(repo):
248 def generatev1(repo):
249 """Emit content for version 1 of a streaming clone.
249 """Emit content for version 1 of a streaming clone.
250
250
251 This returns a 3-tuple of (file count, byte size, data iterator).
251 This returns a 3-tuple of (file count, byte size, data iterator).
252
252
253 The data iterator consists of N entries for each file being transferred.
253 The data iterator consists of N entries for each file being transferred.
254 Each file entry starts as a line with the file name and integer size
254 Each file entry starts as a line with the file name and integer size
255 delimited by a null byte.
255 delimited by a null byte.
256
256
257 The raw file data follows. Following the raw file data is the next file
257 The raw file data follows. Following the raw file data is the next file
258 entry, or EOF.
258 entry, or EOF.
259
259
260 When used on the wire protocol, an additional line indicating protocol
260 When used on the wire protocol, an additional line indicating protocol
261 success will be prepended to the stream. This function is not responsible
261 success will be prepended to the stream. This function is not responsible
262 for adding it.
262 for adding it.
263
263
264 This function will obtain a repository lock to ensure a consistent view of
264 This function will obtain a repository lock to ensure a consistent view of
265 the store is captured. It therefore may raise LockError.
265 the store is captured. It therefore may raise LockError.
266 """
266 """
267 entries = []
267 entries = []
268 total_bytes = 0
268 total_bytes = 0
269 # Get consistent snapshot of repo, lock during scan.
269 # Get consistent snapshot of repo, lock during scan.
270 with repo.lock():
270 with repo.lock():
271 repo.ui.debug(b'scanning\n')
271 repo.ui.debug(b'scanning\n')
272 for entry in _walkstreamfiles(repo):
272 for entry in _walkstreamfiles(repo):
273 for f in entry.files():
273 for f in entry.files():
274 file_size = f.file_size(repo.store.vfs)
274 file_size = f.file_size(repo.store.vfs)
275 if file_size:
275 if file_size:
276 entries.append((f.unencoded_path, file_size))
276 entries.append((f.unencoded_path, file_size))
277 total_bytes += file_size
277 total_bytes += file_size
278 _test_sync_point_walk_1(repo)
278 _test_sync_point_walk_1(repo)
279 _test_sync_point_walk_2(repo)
279 _test_sync_point_walk_2(repo)
280
280
281 repo.ui.debug(
281 repo.ui.debug(
282 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
282 b'%d files, %d bytes to transfer\n' % (len(entries), total_bytes)
283 )
283 )
284
284
285 svfs = repo.svfs
285 svfs = repo.svfs
286 debugflag = repo.ui.debugflag
286 debugflag = repo.ui.debugflag
287
287
288 def emitrevlogdata():
288 def emitrevlogdata():
289 for name, size in entries:
289 for name, size in entries:
290 if debugflag:
290 if debugflag:
291 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
291 repo.ui.debug(b'sending %s (%d bytes)\n' % (name, size))
292 # partially encode name over the wire for backwards compat
292 # partially encode name over the wire for backwards compat
293 yield b'%s\0%d\n' % (store.encodedir(name), size)
293 yield b'%s\0%d\n' % (store.encodedir(name), size)
294 # auditing at this stage is both pointless (paths are already
294 # auditing at this stage is both pointless (paths are already
295 # trusted by the local repo) and expensive
295 # trusted by the local repo) and expensive
296 with svfs(name, b'rb', auditpath=False) as fp:
296 with svfs(name, b'rb', auditpath=False) as fp:
297 if size <= 65536:
297 if size <= 65536:
298 yield fp.read(size)
298 yield fp.read(size)
299 else:
299 else:
300 for chunk in util.filechunkiter(fp, limit=size):
300 for chunk in util.filechunkiter(fp, limit=size):
301 yield chunk
301 yield chunk
302
302
303 return len(entries), total_bytes, emitrevlogdata()
303 return len(entries), total_bytes, emitrevlogdata()
304
304
305
305
306 def generatev1wireproto(repo):
306 def generatev1wireproto(repo):
307 """Emit content for version 1 of streaming clone suitable for the wire.
307 """Emit content for version 1 of streaming clone suitable for the wire.
308
308
309 This is the data output from ``generatev1()`` with 2 header lines. The
309 This is the data output from ``generatev1()`` with 2 header lines. The
310 first line indicates overall success. The 2nd contains the file count and
310 first line indicates overall success. The 2nd contains the file count and
311 byte size of payload.
311 byte size of payload.
312
312
313 The success line contains "0" for success, "1" for stream generation not
313 The success line contains "0" for success, "1" for stream generation not
314 allowed, and "2" for error locking the repository (possibly indicating
314 allowed, and "2" for error locking the repository (possibly indicating
315 a permissions error for the server process).
315 a permissions error for the server process).
316 """
316 """
317 if not allowservergeneration(repo):
317 if not allowservergeneration(repo):
318 yield b'1\n'
318 yield b'1\n'
319 return
319 return
320
320
321 try:
321 try:
322 filecount, bytecount, it = generatev1(repo)
322 filecount, bytecount, it = generatev1(repo)
323 except error.LockError:
323 except error.LockError:
324 yield b'2\n'
324 yield b'2\n'
325 return
325 return
326
326
327 # Indicates successful response.
327 # Indicates successful response.
328 yield b'0\n'
328 yield b'0\n'
329 yield b'%d %d\n' % (filecount, bytecount)
329 yield b'%d %d\n' % (filecount, bytecount)
330 for chunk in it:
330 for chunk in it:
331 yield chunk
331 yield chunk
332
332
333
333
334 def generatebundlev1(repo, compression=b'UN'):
334 def generatebundlev1(repo, compression=b'UN'):
335 """Emit content for version 1 of a stream clone bundle.
335 """Emit content for version 1 of a stream clone bundle.
336
336
337 The first 4 bytes of the output ("HGS1") denote this as stream clone
337 The first 4 bytes of the output ("HGS1") denote this as stream clone
338 bundle version 1.
338 bundle version 1.
339
339
340 The next 2 bytes indicate the compression type. Only "UN" is currently
340 The next 2 bytes indicate the compression type. Only "UN" is currently
341 supported.
341 supported.
342
342
343 The next 16 bytes are two 64-bit big endian unsigned integers indicating
343 The next 16 bytes are two 64-bit big endian unsigned integers indicating
344 file count and byte count, respectively.
344 file count and byte count, respectively.
345
345
346 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
346 The next 2 bytes is a 16-bit big endian unsigned short declaring the length
347 of the requirements string, including a trailing \0. The following N bytes
347 of the requirements string, including a trailing \0. The following N bytes
348 are the requirements string, which is ASCII containing a comma-delimited
348 are the requirements string, which is ASCII containing a comma-delimited
349 list of repo requirements that are needed to support the data.
349 list of repo requirements that are needed to support the data.
350
350
351 The remaining content is the output of ``generatev1()`` (which may be
351 The remaining content is the output of ``generatev1()`` (which may be
352 compressed in the future).
352 compressed in the future).
353
353
354 Returns a tuple of (requirements, data generator).
354 Returns a tuple of (requirements, data generator).
355 """
355 """
356 if compression != b'UN':
356 if compression != b'UN':
357 raise ValueError(b'we do not support the compression argument yet')
357 raise ValueError(b'we do not support the compression argument yet')
358
358
359 requirements = streamed_requirements(repo)
359 requirements = streamed_requirements(repo)
360 requires = b','.join(sorted(requirements))
360 requires = b','.join(sorted(requirements))
361
361
362 def gen():
362 def gen():
363 yield b'HGS1'
363 yield b'HGS1'
364 yield compression
364 yield compression
365
365
366 filecount, bytecount, it = generatev1(repo)
366 filecount, bytecount, it = generatev1(repo)
367 repo.ui.status(
367 repo.ui.status(
368 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
368 _(b'writing %d bytes for %d files\n') % (bytecount, filecount)
369 )
369 )
370
370
371 yield struct.pack(b'>QQ', filecount, bytecount)
371 yield struct.pack(b'>QQ', filecount, bytecount)
372 yield struct.pack(b'>H', len(requires) + 1)
372 yield struct.pack(b'>H', len(requires) + 1)
373 yield requires + b'\0'
373 yield requires + b'\0'
374
374
375 # This is where we'll add compression in the future.
375 # This is where we'll add compression in the future.
376 assert compression == b'UN'
376 assert compression == b'UN'
377
377
378 progress = repo.ui.makeprogress(
378 progress = repo.ui.makeprogress(
379 _(b'bundle'), total=bytecount, unit=_(b'bytes')
379 _(b'bundle'), total=bytecount, unit=_(b'bytes')
380 )
380 )
381 progress.update(0)
381 progress.update(0)
382
382
383 for chunk in it:
383 for chunk in it:
384 progress.increment(step=len(chunk))
384 progress.increment(step=len(chunk))
385 yield chunk
385 yield chunk
386
386
387 progress.complete()
387 progress.complete()
388
388
389 return requirements, gen()
389 return requirements, gen()
390
390
391
391
392 def consumev1(repo, fp, filecount, bytecount):
392 def consumev1(repo, fp, filecount, bytecount):
393 """Apply the contents from version 1 of a streaming clone file handle.
393 """Apply the contents from version 1 of a streaming clone file handle.
394
394
395 This takes the output from "stream_out" and applies it to the specified
395 This takes the output from "stream_out" and applies it to the specified
396 repository.
396 repository.
397
397
398 Like "stream_out," the status line added by the wire protocol is not
398 Like "stream_out," the status line added by the wire protocol is not
399 handled by this function.
399 handled by this function.
400 """
400 """
401 with repo.lock():
401 with repo.lock():
402 repo.ui.status(
402 repo.ui.status(
403 _(b'%d files to transfer, %s of data\n')
403 _(b'%d files to transfer, %s of data\n')
404 % (filecount, util.bytecount(bytecount))
404 % (filecount, util.bytecount(bytecount))
405 )
405 )
406 progress = repo.ui.makeprogress(
406 progress = repo.ui.makeprogress(
407 _(b'clone'), total=bytecount, unit=_(b'bytes')
407 _(b'clone'), total=bytecount, unit=_(b'bytes')
408 )
408 )
409 progress.update(0)
409 progress.update(0)
410 start = util.timer()
410 start = util.timer()
411
411
412 # TODO: get rid of (potential) inconsistency
412 # TODO: get rid of (potential) inconsistency
413 #
413 #
414 # If transaction is started and any @filecache property is
414 # If transaction is started and any @filecache property is
415 # changed at this point, it causes inconsistency between
415 # changed at this point, it causes inconsistency between
416 # in-memory cached property and streamclone-ed file on the
416 # in-memory cached property and streamclone-ed file on the
417 # disk. Nested transaction prevents transaction scope "clone"
417 # disk. Nested transaction prevents transaction scope "clone"
418 # below from writing in-memory changes out at the end of it,
418 # below from writing in-memory changes out at the end of it,
419 # even though in-memory changes are discarded at the end of it
419 # even though in-memory changes are discarded at the end of it
420 # regardless of transaction nesting.
420 # regardless of transaction nesting.
421 #
421 #
422 # But transaction nesting can't be simply prohibited, because
422 # But transaction nesting can't be simply prohibited, because
423 # nesting occurs also in ordinary case (e.g. enabling
423 # nesting occurs also in ordinary case (e.g. enabling
424 # clonebundles).
424 # clonebundles).
425
425
426 with repo.transaction(b'clone'):
426 with repo.transaction(b'clone'):
427 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
427 with repo.svfs.backgroundclosing(repo.ui, expectedcount=filecount):
428 for i in range(filecount):
428 for i in range(filecount):
429 # XXX doesn't support '\n' or '\r' in filenames
429 # XXX doesn't support '\n' or '\r' in filenames
430 l = fp.readline()
430 l = fp.readline()
431 try:
431 try:
432 name, size = l.split(b'\0', 1)
432 name, size = l.split(b'\0', 1)
433 size = int(size)
433 size = int(size)
434 except (ValueError, TypeError):
434 except (ValueError, TypeError):
435 raise error.ResponseError(
435 raise error.ResponseError(
436 _(b'unexpected response from remote server:'), l
436 _(b'unexpected response from remote server:'), l
437 )
437 )
438 if repo.ui.debugflag:
438 if repo.ui.debugflag:
439 repo.ui.debug(
439 repo.ui.debug(
440 b'adding %s (%s)\n' % (name, util.bytecount(size))
440 b'adding %s (%s)\n' % (name, util.bytecount(size))
441 )
441 )
442 # for backwards compat, name was partially encoded
442 # for backwards compat, name was partially encoded
443 path = store.decodedir(name)
443 path = store.decodedir(name)
444 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
444 with repo.svfs(path, b'w', backgroundclose=True) as ofp:
445 for chunk in util.filechunkiter(fp, limit=size):
445 for chunk in util.filechunkiter(fp, limit=size):
446 progress.increment(step=len(chunk))
446 progress.increment(step=len(chunk))
447 ofp.write(chunk)
447 ofp.write(chunk)
448
448
449 # force @filecache properties to be reloaded from
449 # force @filecache properties to be reloaded from
450 # streamclone-ed file at next access
450 # streamclone-ed file at next access
451 repo.invalidate(clearfilecache=True)
451 repo.invalidate(clearfilecache=True)
452
452
453 elapsed = util.timer() - start
453 elapsed = util.timer() - start
454 if elapsed <= 0:
454 if elapsed <= 0:
455 elapsed = 0.001
455 elapsed = 0.001
456 progress.complete()
456 progress.complete()
457 repo.ui.status(
457 repo.ui.status(
458 _(b'transferred %s in %.1f seconds (%s/sec)\n')
458 _(b'transferred %s in %.1f seconds (%s/sec)\n')
459 % (
459 % (
460 util.bytecount(bytecount),
460 util.bytecount(bytecount),
461 elapsed,
461 elapsed,
462 util.bytecount(bytecount / elapsed),
462 util.bytecount(bytecount / elapsed),
463 )
463 )
464 )
464 )
465
465
466
466
467 def readbundle1header(fp):
467 def readbundle1header(fp):
468 compression = fp.read(2)
468 compression = fp.read(2)
469 if compression != b'UN':
469 if compression != b'UN':
470 raise error.Abort(
470 raise error.Abort(
471 _(
471 _(
472 b'only uncompressed stream clone bundles are '
472 b'only uncompressed stream clone bundles are '
473 b'supported; got %s'
473 b'supported; got %s'
474 )
474 )
475 % compression
475 % compression
476 )
476 )
477
477
478 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
478 filecount, bytecount = struct.unpack(b'>QQ', fp.read(16))
479 requireslen = struct.unpack(b'>H', fp.read(2))[0]
479 requireslen = struct.unpack(b'>H', fp.read(2))[0]
480 requires = fp.read(requireslen)
480 requires = fp.read(requireslen)
481
481
482 if not requires.endswith(b'\0'):
482 if not requires.endswith(b'\0'):
483 raise error.Abort(
483 raise error.Abort(
484 _(
484 _(
485 b'malformed stream clone bundle: '
485 b'malformed stream clone bundle: '
486 b'requirements not properly encoded'
486 b'requirements not properly encoded'
487 )
487 )
488 )
488 )
489
489
490 requirements = set(requires.rstrip(b'\0').split(b','))
490 requirements = set(requires.rstrip(b'\0').split(b','))
491
491
492 return filecount, bytecount, requirements
492 return filecount, bytecount, requirements
493
493
494
494
495 def applybundlev1(repo, fp):
495 def applybundlev1(repo, fp):
496 """Apply the content from a stream clone bundle version 1.
496 """Apply the content from a stream clone bundle version 1.
497
497
498 We assume the 4 byte header has been read and validated and the file handle
498 We assume the 4 byte header has been read and validated and the file handle
499 is at the 2 byte compression identifier.
499 is at the 2 byte compression identifier.
500 """
500 """
501 if len(repo):
501 if len(repo):
502 raise error.Abort(
502 raise error.Abort(
503 _(b'cannot apply stream clone bundle on non-empty repo')
503 _(b'cannot apply stream clone bundle on non-empty repo')
504 )
504 )
505
505
506 filecount, bytecount, requirements = readbundle1header(fp)
506 filecount, bytecount, requirements = readbundle1header(fp)
507 missingreqs = requirements - repo.supported
507 missingreqs = requirements - repo.supported
508 if missingreqs:
508 if missingreqs:
509 raise error.Abort(
509 raise error.Abort(
510 _(b'unable to apply stream clone: unsupported format: %s')
510 _(b'unable to apply stream clone: unsupported format: %s')
511 % b', '.join(sorted(missingreqs))
511 % b', '.join(sorted(missingreqs))
512 )
512 )
513
513
514 consumev1(repo, fp, filecount, bytecount)
514 consumev1(repo, fp, filecount, bytecount)
515 nodemap.post_stream_cleanup(repo)
515 nodemap.post_stream_cleanup(repo)
516
516
517
517
518 class streamcloneapplier:
518 class streamcloneapplier:
519 """Class to manage applying streaming clone bundles.
519 """Class to manage applying streaming clone bundles.
520
520
521 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
521 We need to wrap ``applybundlev1()`` in a dedicated type to enable bundle
522 readers to perform bundle type-specific functionality.
522 readers to perform bundle type-specific functionality.
523 """
523 """
524
524
525 def __init__(self, fh):
525 def __init__(self, fh):
526 self._fh = fh
526 self._fh = fh
527
527
528 def apply(self, repo):
528 def apply(self, repo):
529 return applybundlev1(repo, self._fh)
529 return applybundlev1(repo, self._fh)
530
530
531
531
532 # type of file to stream
532 # type of file to stream
533 _fileappend = 0 # append only file
533 _fileappend = 0 # append only file
534 _filefull = 1 # full snapshot file
534 _filefull = 1 # full snapshot file
535
535
536 # Source of the file
536 # Source of the file
537 _srcstore = b's' # store (svfs)
537 _srcstore = b's' # store (svfs)
538 _srccache = b'c' # cache (cache)
538 _srccache = b'c' # cache (cache)
539
539
540 # This is it's own function so extensions can override it.
540 # This is it's own function so extensions can override it.
541 def _walkstreamfullstorefiles(repo):
541 def _walkstreamfullstorefiles(repo):
542 """list snapshot file from the store"""
542 """list snapshot file from the store"""
543 fnames = []
543 fnames = []
544 if not repo.publishing():
544 if not repo.publishing():
545 fnames.append(b'phaseroots')
545 fnames.append(b'phaseroots')
546 return fnames
546 return fnames
547
547
548
548
549 def _filterfull(entry, copy, vfsmap):
549 def _filterfull(entry, copy, vfsmap):
550 """actually copy the snapshot files"""
550 """actually copy the snapshot files"""
551 src, name, ftype, data = entry
551 src, name, ftype, data = entry
552 if ftype != _filefull:
552 if ftype != _filefull:
553 return entry
553 return entry
554 return (src, name, ftype, copy(vfsmap[src].join(name)))
554 return (src, name, ftype, copy(vfsmap[src].join(name)))
555
555
556
556
557 @contextlib.contextmanager
557 @contextlib.contextmanager
558 def maketempcopies():
558 def maketempcopies():
559 """return a function to temporary copy file"""
559 """return a function to temporary copy file"""
560
560
561 files = []
561 files = []
562 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
562 dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
563 try:
563 try:
564
564
565 def copy(src):
565 def copy(src):
566 fd, dst = pycompat.mkstemp(
566 fd, dst = pycompat.mkstemp(
567 prefix=os.path.basename(src), dir=dst_dir
567 prefix=os.path.basename(src), dir=dst_dir
568 )
568 )
569 os.close(fd)
569 os.close(fd)
570 files.append(dst)
570 files.append(dst)
571 util.copyfiles(src, dst, hardlink=True)
571 util.copyfiles(src, dst, hardlink=True)
572 return dst
572 return dst
573
573
574 yield copy
574 yield copy
575 finally:
575 finally:
576 for tmp in files:
576 for tmp in files:
577 util.tryunlink(tmp)
577 util.tryunlink(tmp)
578 util.tryrmdir(dst_dir)
578 util.tryrmdir(dst_dir)
579
579
580
580
581 def _makemap(repo):
581 def _makemap(repo):
582 """make a (src -> vfs) map for the repo"""
582 """make a (src -> vfs) map for the repo"""
583 vfsmap = {
583 vfsmap = {
584 _srcstore: repo.svfs,
584 _srcstore: repo.svfs,
585 _srccache: repo.cachevfs,
585 _srccache: repo.cachevfs,
586 }
586 }
587 # we keep repo.vfs out of the on purpose, ther are too many danger there
587 # we keep repo.vfs out of the on purpose, ther are too many danger there
588 # (eg: .hg/hgrc)
588 # (eg: .hg/hgrc)
589 assert repo.vfs not in vfsmap.values()
589 assert repo.vfs not in vfsmap.values()
590
590
591 return vfsmap
591 return vfsmap
592
592
593
593
594 def _emit2(repo, entries, totalfilesize):
594 def _emit2(repo, entries, totalfilesize):
595 """actually emit the stream bundle"""
595 """actually emit the stream bundle"""
596 vfsmap = _makemap(repo)
596 vfsmap = _makemap(repo)
597 # we keep repo.vfs out of the on purpose, ther are too many danger there
597 # we keep repo.vfs out of the on purpose, ther are too many danger there
598 # (eg: .hg/hgrc),
598 # (eg: .hg/hgrc),
599 #
599 #
600 # this assert is duplicated (from _makemap) as author might think this is
600 # this assert is duplicated (from _makemap) as author might think this is
601 # fine, while this is really not fine.
601 # fine, while this is really not fine.
602 if repo.vfs in vfsmap.values():
602 if repo.vfs in vfsmap.values():
603 raise error.ProgrammingError(
603 raise error.ProgrammingError(
604 b'repo.vfs must not be added to vfsmap for security reasons'
604 b'repo.vfs must not be added to vfsmap for security reasons'
605 )
605 )
606
606
607 progress = repo.ui.makeprogress(
607 progress = repo.ui.makeprogress(
608 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
608 _(b'bundle'), total=totalfilesize, unit=_(b'bytes')
609 )
609 )
610 progress.update(0)
610 progress.update(0)
611 with maketempcopies() as copy, progress:
611 with maketempcopies() as copy, progress:
612 # copy is delayed until we are in the try
612 # copy is delayed until we are in the try
613 entries = [_filterfull(e, copy, vfsmap) for e in entries]
613 entries = [_filterfull(e, copy, vfsmap) for e in entries]
614 yield None # this release the lock on the repository
614 yield None # this release the lock on the repository
615 totalbytecount = 0
615 totalbytecount = 0
616
616
617 for src, name, ftype, data in entries:
617 for src, name, ftype, data in entries:
618 vfs = vfsmap[src]
618 vfs = vfsmap[src]
619 yield src
619 yield src
620 yield util.uvarintencode(len(name))
620 yield util.uvarintencode(len(name))
621 if ftype == _fileappend:
621 if ftype == _fileappend:
622 fp = vfs(name)
622 fp = vfs(name)
623 size = data
623 size = data
624 elif ftype == _filefull:
624 elif ftype == _filefull:
625 fp = open(data, b'rb')
625 fp = open(data, b'rb')
626 size = util.fstat(fp).st_size
626 size = util.fstat(fp).st_size
627 bytecount = 0
627 bytecount = 0
628 try:
628 try:
629 yield util.uvarintencode(size)
629 yield util.uvarintencode(size)
630 yield name
630 yield name
631 if size <= 65536:
631 if size <= 65536:
632 chunks = (fp.read(size),)
632 chunks = (fp.read(size),)
633 else:
633 else:
634 chunks = util.filechunkiter(fp, limit=size)
634 chunks = util.filechunkiter(fp, limit=size)
635 for chunk in chunks:
635 for chunk in chunks:
636 bytecount += len(chunk)
636 bytecount += len(chunk)
637 totalbytecount += len(chunk)
637 totalbytecount += len(chunk)
638 progress.update(totalbytecount)
638 progress.update(totalbytecount)
639 yield chunk
639 yield chunk
640 if bytecount != size:
640 if bytecount != size:
641 # Would most likely be caused by a race due to `hg strip` or
641 # Would most likely be caused by a race due to `hg strip` or
642 # a revlog split
642 # a revlog split
643 raise error.Abort(
643 raise error.Abort(
644 _(
644 _(
645 b'clone could only read %d bytes from %s, but '
645 b'clone could only read %d bytes from %s, but '
646 b'expected %d bytes'
646 b'expected %d bytes'
647 )
647 )
648 % (bytecount, name, size)
648 % (bytecount, name, size)
649 )
649 )
650 finally:
650 finally:
651 fp.close()
651 fp.close()
652
652
653
653
654 def _test_sync_point_walk_1(repo):
654 def _test_sync_point_walk_1(repo):
655 """a function for synchronisation during tests"""
655 """a function for synchronisation during tests"""
656
656
657
657
658 def _test_sync_point_walk_2(repo):
658 def _test_sync_point_walk_2(repo):
659 """a function for synchronisation during tests"""
659 """a function for synchronisation during tests"""
660
660
661
661
662 def _v2_walk(repo, includes, excludes, includeobsmarkers):
662 def _v2_walk(repo, includes, excludes, includeobsmarkers):
663 """emit a seris of files information useful to clone a repo
663 """emit a seris of files information useful to clone a repo
664
664
665 return (entries, totalfilesize)
665 return (entries, totalfilesize)
666
666
667 entries is a list of tuple (vfs-key, file-path, file-type, size)
667 entries is a list of tuple (vfs-key, file-path, file-type, size)
668
668
669 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
669 - `vfs-key`: is a key to the right vfs to write the file (see _makemap)
670 - `name`: file path of the file to copy (to be feed to the vfss)
670 - `name`: file path of the file to copy (to be feed to the vfss)
671 - `file-type`: do this file need to be copied with the source lock ?
671 - `file-type`: do this file need to be copied with the source lock ?
672 - `size`: the size of the file (or None)
672 - `size`: the size of the file (or None)
673 """
673 """
674 assert repo._currentlock(repo._lockref) is not None
674 assert repo._currentlock(repo._lockref) is not None
675 entries = []
675 files = []
676 totalfilesize = 0
676 totalfilesize = 0
677
677
678 matcher = None
678 matcher = None
679 if includes or excludes:
679 if includes or excludes:
680 matcher = narrowspec.match(repo.root, includes, excludes)
680 matcher = narrowspec.match(repo.root, includes, excludes)
681
681
682 phase = not repo.publishing()
682 phase = not repo.publishing()
683 for entry in _walkstreamfiles(repo, matcher, phase=phase):
683 entries = _walkstreamfiles(
684 repo, matcher, phase=phase, obsolescence=includeobsmarkers
685 )
686 for entry in entries:
684 for f in entry.files():
687 for f in entry.files():
685 file_size = f.file_size(repo.store.vfs)
688 file_size = f.file_size(repo.store.vfs)
686 if file_size:
689 if file_size:
687 ft = _fileappend
690 ft = _fileappend
688 if f.is_volatile:
691 if f.is_volatile:
689 ft = _filefull
692 ft = _filefull
690 entries.append((_srcstore, f.unencoded_path, ft, file_size))
693 files.append((_srcstore, f.unencoded_path, ft, file_size))
691 totalfilesize += file_size
694 totalfilesize += file_size
692 if includeobsmarkers and repo.svfs.exists(b'obsstore'):
693 totalfilesize += repo.svfs.lstat(b'obsstore').st_size
694 entries.append((_srcstore, b'obsstore', _filefull, None))
695 for name in cacheutil.cachetocopy(repo):
695 for name in cacheutil.cachetocopy(repo):
696 if repo.cachevfs.exists(name):
696 if repo.cachevfs.exists(name):
697 totalfilesize += repo.cachevfs.lstat(name).st_size
697 totalfilesize += repo.cachevfs.lstat(name).st_size
698 entries.append((_srccache, name, _filefull, None))
698 files.append((_srccache, name, _filefull, None))
699 return entries, totalfilesize
699 return files, totalfilesize
700
700
701
701
702 def generatev2(repo, includes, excludes, includeobsmarkers):
702 def generatev2(repo, includes, excludes, includeobsmarkers):
703 """Emit content for version 2 of a streaming clone.
703 """Emit content for version 2 of a streaming clone.
704
704
705 the data stream consists the following entries:
705 the data stream consists the following entries:
706 1) A char representing the file destination (eg: store or cache)
706 1) A char representing the file destination (eg: store or cache)
707 2) A varint containing the length of the filename
707 2) A varint containing the length of the filename
708 3) A varint containing the length of file data
708 3) A varint containing the length of file data
709 4) N bytes containing the filename (the internal, store-agnostic form)
709 4) N bytes containing the filename (the internal, store-agnostic form)
710 5) N bytes containing the file data
710 5) N bytes containing the file data
711
711
712 Returns a 3-tuple of (file count, file size, data iterator).
712 Returns a 3-tuple of (file count, file size, data iterator).
713 """
713 """
714
714
715 with repo.lock():
715 with repo.lock():
716
716
717 repo.ui.debug(b'scanning\n')
717 repo.ui.debug(b'scanning\n')
718
718
719 entries, totalfilesize = _v2_walk(
719 entries, totalfilesize = _v2_walk(
720 repo,
720 repo,
721 includes=includes,
721 includes=includes,
722 excludes=excludes,
722 excludes=excludes,
723 includeobsmarkers=includeobsmarkers,
723 includeobsmarkers=includeobsmarkers,
724 )
724 )
725
725
726 chunks = _emit2(repo, entries, totalfilesize)
726 chunks = _emit2(repo, entries, totalfilesize)
727 first = next(chunks)
727 first = next(chunks)
728 assert first is None
728 assert first is None
729 _test_sync_point_walk_1(repo)
729 _test_sync_point_walk_1(repo)
730 _test_sync_point_walk_2(repo)
730 _test_sync_point_walk_2(repo)
731
731
732 return len(entries), totalfilesize, chunks
732 return len(entries), totalfilesize, chunks
733
733
734
734
735 @contextlib.contextmanager
735 @contextlib.contextmanager
736 def nested(*ctxs):
736 def nested(*ctxs):
737 this = ctxs[0]
737 this = ctxs[0]
738 rest = ctxs[1:]
738 rest = ctxs[1:]
739 with this:
739 with this:
740 if rest:
740 if rest:
741 with nested(*rest):
741 with nested(*rest):
742 yield
742 yield
743 else:
743 else:
744 yield
744 yield
745
745
746
746
747 def consumev2(repo, fp, filecount, filesize):
747 def consumev2(repo, fp, filecount, filesize):
748 """Apply the contents from a version 2 streaming clone.
748 """Apply the contents from a version 2 streaming clone.
749
749
750 Data is read from an object that only needs to provide a ``read(size)``
750 Data is read from an object that only needs to provide a ``read(size)``
751 method.
751 method.
752 """
752 """
753 with repo.lock():
753 with repo.lock():
754 repo.ui.status(
754 repo.ui.status(
755 _(b'%d files to transfer, %s of data\n')
755 _(b'%d files to transfer, %s of data\n')
756 % (filecount, util.bytecount(filesize))
756 % (filecount, util.bytecount(filesize))
757 )
757 )
758
758
759 start = util.timer()
759 start = util.timer()
760 progress = repo.ui.makeprogress(
760 progress = repo.ui.makeprogress(
761 _(b'clone'), total=filesize, unit=_(b'bytes')
761 _(b'clone'), total=filesize, unit=_(b'bytes')
762 )
762 )
763 progress.update(0)
763 progress.update(0)
764
764
765 vfsmap = _makemap(repo)
765 vfsmap = _makemap(repo)
766 # we keep repo.vfs out of the on purpose, ther are too many danger
766 # we keep repo.vfs out of the on purpose, ther are too many danger
767 # there (eg: .hg/hgrc),
767 # there (eg: .hg/hgrc),
768 #
768 #
769 # this assert is duplicated (from _makemap) as author might think this
769 # this assert is duplicated (from _makemap) as author might think this
770 # is fine, while this is really not fine.
770 # is fine, while this is really not fine.
771 if repo.vfs in vfsmap.values():
771 if repo.vfs in vfsmap.values():
772 raise error.ProgrammingError(
772 raise error.ProgrammingError(
773 b'repo.vfs must not be added to vfsmap for security reasons'
773 b'repo.vfs must not be added to vfsmap for security reasons'
774 )
774 )
775
775
776 with repo.transaction(b'clone'):
776 with repo.transaction(b'clone'):
777 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
777 ctxs = (vfs.backgroundclosing(repo.ui) for vfs in vfsmap.values())
778 with nested(*ctxs):
778 with nested(*ctxs):
779 for i in range(filecount):
779 for i in range(filecount):
780 src = util.readexactly(fp, 1)
780 src = util.readexactly(fp, 1)
781 vfs = vfsmap[src]
781 vfs = vfsmap[src]
782 namelen = util.uvarintdecodestream(fp)
782 namelen = util.uvarintdecodestream(fp)
783 datalen = util.uvarintdecodestream(fp)
783 datalen = util.uvarintdecodestream(fp)
784
784
785 name = util.readexactly(fp, namelen)
785 name = util.readexactly(fp, namelen)
786
786
787 if repo.ui.debugflag:
787 if repo.ui.debugflag:
788 repo.ui.debug(
788 repo.ui.debug(
789 b'adding [%s] %s (%s)\n'
789 b'adding [%s] %s (%s)\n'
790 % (src, name, util.bytecount(datalen))
790 % (src, name, util.bytecount(datalen))
791 )
791 )
792
792
793 with vfs(name, b'w') as ofp:
793 with vfs(name, b'w') as ofp:
794 for chunk in util.filechunkiter(fp, limit=datalen):
794 for chunk in util.filechunkiter(fp, limit=datalen):
795 progress.increment(step=len(chunk))
795 progress.increment(step=len(chunk))
796 ofp.write(chunk)
796 ofp.write(chunk)
797
797
798 # force @filecache properties to be reloaded from
798 # force @filecache properties to be reloaded from
799 # streamclone-ed file at next access
799 # streamclone-ed file at next access
800 repo.invalidate(clearfilecache=True)
800 repo.invalidate(clearfilecache=True)
801
801
802 elapsed = util.timer() - start
802 elapsed = util.timer() - start
803 if elapsed <= 0:
803 if elapsed <= 0:
804 elapsed = 0.001
804 elapsed = 0.001
805 repo.ui.status(
805 repo.ui.status(
806 _(b'transferred %s in %.1f seconds (%s/sec)\n')
806 _(b'transferred %s in %.1f seconds (%s/sec)\n')
807 % (
807 % (
808 util.bytecount(progress.pos),
808 util.bytecount(progress.pos),
809 elapsed,
809 elapsed,
810 util.bytecount(progress.pos / elapsed),
810 util.bytecount(progress.pos / elapsed),
811 )
811 )
812 )
812 )
813 progress.complete()
813 progress.complete()
814
814
815
815
816 def applybundlev2(repo, fp, filecount, filesize, requirements):
816 def applybundlev2(repo, fp, filecount, filesize, requirements):
817 from . import localrepo
817 from . import localrepo
818
818
819 missingreqs = [r for r in requirements if r not in repo.supported]
819 missingreqs = [r for r in requirements if r not in repo.supported]
820 if missingreqs:
820 if missingreqs:
821 raise error.Abort(
821 raise error.Abort(
822 _(b'unable to apply stream clone: unsupported format: %s')
822 _(b'unable to apply stream clone: unsupported format: %s')
823 % b', '.join(sorted(missingreqs))
823 % b', '.join(sorted(missingreqs))
824 )
824 )
825
825
826 consumev2(repo, fp, filecount, filesize)
826 consumev2(repo, fp, filecount, filesize)
827
827
828 repo.requirements = new_stream_clone_requirements(
828 repo.requirements = new_stream_clone_requirements(
829 repo.requirements,
829 repo.requirements,
830 requirements,
830 requirements,
831 )
831 )
832 repo.svfs.options = localrepo.resolvestorevfsoptions(
832 repo.svfs.options = localrepo.resolvestorevfsoptions(
833 repo.ui, repo.requirements, repo.features
833 repo.ui, repo.requirements, repo.features
834 )
834 )
835 scmutil.writereporequirements(repo)
835 scmutil.writereporequirements(repo)
836 nodemap.post_stream_cleanup(repo)
836 nodemap.post_stream_cleanup(repo)
837
837
838
838
839 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
839 def _copy_files(src_vfs_map, dst_vfs_map, entries, progress):
840 hardlink = [True]
840 hardlink = [True]
841
841
842 def copy_used():
842 def copy_used():
843 hardlink[0] = False
843 hardlink[0] = False
844 progress.topic = _(b'copying')
844 progress.topic = _(b'copying')
845
845
846 for k, path, size in entries:
846 for k, path, size in entries:
847 src_vfs = src_vfs_map[k]
847 src_vfs = src_vfs_map[k]
848 dst_vfs = dst_vfs_map[k]
848 dst_vfs = dst_vfs_map[k]
849 src_path = src_vfs.join(path)
849 src_path = src_vfs.join(path)
850 dst_path = dst_vfs.join(path)
850 dst_path = dst_vfs.join(path)
851 # We cannot use dirname and makedirs of dst_vfs here because the store
851 # We cannot use dirname and makedirs of dst_vfs here because the store
852 # encoding confuses them. See issue 6581 for details.
852 # encoding confuses them. See issue 6581 for details.
853 dirname = os.path.dirname(dst_path)
853 dirname = os.path.dirname(dst_path)
854 if not os.path.exists(dirname):
854 if not os.path.exists(dirname):
855 util.makedirs(dirname)
855 util.makedirs(dirname)
856 dst_vfs.register_file(path)
856 dst_vfs.register_file(path)
857 # XXX we could use the #nb_bytes argument.
857 # XXX we could use the #nb_bytes argument.
858 util.copyfile(
858 util.copyfile(
859 src_path,
859 src_path,
860 dst_path,
860 dst_path,
861 hardlink=hardlink[0],
861 hardlink=hardlink[0],
862 no_hardlink_cb=copy_used,
862 no_hardlink_cb=copy_used,
863 check_fs_hardlink=False,
863 check_fs_hardlink=False,
864 )
864 )
865 progress.increment()
865 progress.increment()
866 return hardlink[0]
866 return hardlink[0]
867
867
868
868
869 def local_copy(src_repo, dest_repo):
869 def local_copy(src_repo, dest_repo):
870 """copy all content from one local repository to another
870 """copy all content from one local repository to another
871
871
872 This is useful for local clone"""
872 This is useful for local clone"""
873 src_store_requirements = {
873 src_store_requirements = {
874 r
874 r
875 for r in src_repo.requirements
875 for r in src_repo.requirements
876 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
876 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
877 }
877 }
878 dest_store_requirements = {
878 dest_store_requirements = {
879 r
879 r
880 for r in dest_repo.requirements
880 for r in dest_repo.requirements
881 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
881 if r not in requirementsmod.WORKING_DIR_REQUIREMENTS
882 }
882 }
883 assert src_store_requirements == dest_store_requirements
883 assert src_store_requirements == dest_store_requirements
884
884
885 with dest_repo.lock():
885 with dest_repo.lock():
886 with src_repo.lock():
886 with src_repo.lock():
887
887
888 # bookmark is not integrated to the streaming as it might use the
888 # bookmark is not integrated to the streaming as it might use the
889 # `repo.vfs` and they are too many sentitive data accessible
889 # `repo.vfs` and they are too many sentitive data accessible
890 # through `repo.vfs` to expose it to streaming clone.
890 # through `repo.vfs` to expose it to streaming clone.
891 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
891 src_book_vfs = bookmarks.bookmarksvfs(src_repo)
892 srcbookmarks = src_book_vfs.join(b'bookmarks')
892 srcbookmarks = src_book_vfs.join(b'bookmarks')
893 bm_count = 0
893 bm_count = 0
894 if os.path.exists(srcbookmarks):
894 if os.path.exists(srcbookmarks):
895 bm_count = 1
895 bm_count = 1
896
896
897 entries, totalfilesize = _v2_walk(
897 entries, totalfilesize = _v2_walk(
898 src_repo,
898 src_repo,
899 includes=None,
899 includes=None,
900 excludes=None,
900 excludes=None,
901 includeobsmarkers=True,
901 includeobsmarkers=True,
902 )
902 )
903 src_vfs_map = _makemap(src_repo)
903 src_vfs_map = _makemap(src_repo)
904 dest_vfs_map = _makemap(dest_repo)
904 dest_vfs_map = _makemap(dest_repo)
905 progress = src_repo.ui.makeprogress(
905 progress = src_repo.ui.makeprogress(
906 topic=_(b'linking'),
906 topic=_(b'linking'),
907 total=len(entries) + bm_count,
907 total=len(entries) + bm_count,
908 unit=_(b'files'),
908 unit=_(b'files'),
909 )
909 )
910 # copy files
910 # copy files
911 #
911 #
912 # We could copy the full file while the source repository is locked
912 # We could copy the full file while the source repository is locked
913 # and the other one without the lock. However, in the linking case,
913 # and the other one without the lock. However, in the linking case,
914 # this would also requires checks that nobody is appending any data
914 # this would also requires checks that nobody is appending any data
915 # to the files while we do the clone, so this is not done yet. We
915 # to the files while we do the clone, so this is not done yet. We
916 # could do this blindly when copying files.
916 # could do this blindly when copying files.
917 files = ((k, path, size) for k, path, ftype, size in entries)
917 files = ((k, path, size) for k, path, ftype, size in entries)
918 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
918 hardlink = _copy_files(src_vfs_map, dest_vfs_map, files, progress)
919
919
920 # copy bookmarks over
920 # copy bookmarks over
921 if bm_count:
921 if bm_count:
922 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
922 dst_book_vfs = bookmarks.bookmarksvfs(dest_repo)
923 dstbookmarks = dst_book_vfs.join(b'bookmarks')
923 dstbookmarks = dst_book_vfs.join(b'bookmarks')
924 util.copyfile(srcbookmarks, dstbookmarks)
924 util.copyfile(srcbookmarks, dstbookmarks)
925 progress.complete()
925 progress.complete()
926 if hardlink:
926 if hardlink:
927 msg = b'linked %d files\n'
927 msg = b'linked %d files\n'
928 else:
928 else:
929 msg = b'copied %d files\n'
929 msg = b'copied %d files\n'
930 src_repo.ui.debug(msg % (len(entries) + bm_count))
930 src_repo.ui.debug(msg % (len(entries) + bm_count))
931
931
932 with dest_repo.transaction(b"localclone") as tr:
932 with dest_repo.transaction(b"localclone") as tr:
933 dest_repo.store.write(tr)
933 dest_repo.store.write(tr)
934
934
935 # clean up transaction file as they do not make sense
935 # clean up transaction file as they do not make sense
936 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
936 transaction.cleanup_undo_files(dest_repo.ui.warn, dest_repo.vfs_map)
General Comments 0
You need to be logged in to leave comments. Login now