##// END OF EJS Templates
store: rename `unencoded_path` to `entry_path` for StoreEntry...
marmoute -
r51388:ed8cda1c default
parent child Browse files
Show More
@@ -1,442 +1,442 b''
1 # remotefilelogserver.py - server logic for a remotefilelog server
1 # remotefilelogserver.py - server logic for a remotefilelog server
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import os
8 import os
9 import stat
9 import stat
10 import time
10 import time
11 import zlib
11 import zlib
12
12
13 from mercurial.i18n import _
13 from mercurial.i18n import _
14 from mercurial.node import bin, hex
14 from mercurial.node import bin, hex
15 from mercurial.pycompat import open
15 from mercurial.pycompat import open
16 from mercurial import (
16 from mercurial import (
17 changegroup,
17 changegroup,
18 changelog,
18 changelog,
19 context,
19 context,
20 error,
20 error,
21 extensions,
21 extensions,
22 match,
22 match,
23 scmutil,
23 scmutil,
24 store,
24 store,
25 streamclone,
25 streamclone,
26 util,
26 util,
27 wireprotoserver,
27 wireprotoserver,
28 wireprototypes,
28 wireprototypes,
29 wireprotov1server,
29 wireprotov1server,
30 )
30 )
31 from . import (
31 from . import (
32 constants,
32 constants,
33 shallowutil,
33 shallowutil,
34 )
34 )
35
35
36 _sshv1server = wireprotoserver.sshv1protocolhandler
36 _sshv1server = wireprotoserver.sshv1protocolhandler
37
37
38
38
39 def setupserver(ui, repo):
39 def setupserver(ui, repo):
40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
41 onetimesetup(ui)
41 onetimesetup(ui)
42
42
43 # don't send files to shallow clients during pulls
43 # don't send files to shallow clients during pulls
44 def generatefiles(
44 def generatefiles(
45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
46 ):
46 ):
47 caps = self._bundlecaps or []
47 caps = self._bundlecaps or []
48 if constants.BUNDLE2_CAPABLITY in caps:
48 if constants.BUNDLE2_CAPABLITY in caps:
49 # only send files that don't match the specified patterns
49 # only send files that don't match the specified patterns
50 includepattern = None
50 includepattern = None
51 excludepattern = None
51 excludepattern = None
52 for cap in self._bundlecaps or []:
52 for cap in self._bundlecaps or []:
53 if cap.startswith(b"includepattern="):
53 if cap.startswith(b"includepattern="):
54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
55 elif cap.startswith(b"excludepattern="):
55 elif cap.startswith(b"excludepattern="):
56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
57
57
58 m = match.always()
58 m = match.always()
59 if includepattern or excludepattern:
59 if includepattern or excludepattern:
60 m = match.match(
60 m = match.match(
61 repo.root, b'', None, includepattern, excludepattern
61 repo.root, b'', None, includepattern, excludepattern
62 )
62 )
63
63
64 changedfiles = list([f for f in changedfiles if not m(f)])
64 changedfiles = list([f for f in changedfiles if not m(f)])
65 return orig(
65 return orig(
66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
67 )
67 )
68
68
69 extensions.wrapfunction(
69 extensions.wrapfunction(
70 changegroup.cgpacker, b'generatefiles', generatefiles
70 changegroup.cgpacker, b'generatefiles', generatefiles
71 )
71 )
72
72
73
73
74 onetime = False
74 onetime = False
75
75
76
76
77 def onetimesetup(ui):
77 def onetimesetup(ui):
78 """Configures the wireprotocol for both clients and servers."""
78 """Configures the wireprotocol for both clients and servers."""
79 global onetime
79 global onetime
80 if onetime:
80 if onetime:
81 return
81 return
82 onetime = True
82 onetime = True
83
83
84 # support file content requests
84 # support file content requests
85 wireprotov1server.wireprotocommand(
85 wireprotov1server.wireprotocommand(
86 b'x_rfl_getflogheads', b'path', permission=b'pull'
86 b'x_rfl_getflogheads', b'path', permission=b'pull'
87 )(getflogheads)
87 )(getflogheads)
88 wireprotov1server.wireprotocommand(
88 wireprotov1server.wireprotocommand(
89 b'x_rfl_getfiles', b'', permission=b'pull'
89 b'x_rfl_getfiles', b'', permission=b'pull'
90 )(getfiles)
90 )(getfiles)
91 wireprotov1server.wireprotocommand(
91 wireprotov1server.wireprotocommand(
92 b'x_rfl_getfile', b'file node', permission=b'pull'
92 b'x_rfl_getfile', b'file node', permission=b'pull'
93 )(getfile)
93 )(getfile)
94
94
95 class streamstate:
95 class streamstate:
96 match = None
96 match = None
97 shallowremote = False
97 shallowremote = False
98 noflatmf = False
98 noflatmf = False
99
99
100 state = streamstate()
100 state = streamstate()
101
101
102 def stream_out_shallow(repo, proto, other):
102 def stream_out_shallow(repo, proto, other):
103 includepattern = None
103 includepattern = None
104 excludepattern = None
104 excludepattern = None
105 raw = other.get(b'includepattern')
105 raw = other.get(b'includepattern')
106 if raw:
106 if raw:
107 includepattern = raw.split(b'\0')
107 includepattern = raw.split(b'\0')
108 raw = other.get(b'excludepattern')
108 raw = other.get(b'excludepattern')
109 if raw:
109 if raw:
110 excludepattern = raw.split(b'\0')
110 excludepattern = raw.split(b'\0')
111
111
112 oldshallow = state.shallowremote
112 oldshallow = state.shallowremote
113 oldmatch = state.match
113 oldmatch = state.match
114 oldnoflatmf = state.noflatmf
114 oldnoflatmf = state.noflatmf
115 try:
115 try:
116 state.shallowremote = True
116 state.shallowremote = True
117 state.match = match.always()
117 state.match = match.always()
118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
119 if includepattern or excludepattern:
119 if includepattern or excludepattern:
120 state.match = match.match(
120 state.match = match.match(
121 repo.root, b'', None, includepattern, excludepattern
121 repo.root, b'', None, includepattern, excludepattern
122 )
122 )
123 streamres = wireprotov1server.stream(repo, proto)
123 streamres = wireprotov1server.stream(repo, proto)
124
124
125 # Force the first value to execute, so the file list is computed
125 # Force the first value to execute, so the file list is computed
126 # within the try/finally scope
126 # within the try/finally scope
127 first = next(streamres.gen)
127 first = next(streamres.gen)
128 second = next(streamres.gen)
128 second = next(streamres.gen)
129
129
130 def gen():
130 def gen():
131 yield first
131 yield first
132 yield second
132 yield second
133 for value in streamres.gen:
133 for value in streamres.gen:
134 yield value
134 yield value
135
135
136 return wireprototypes.streamres(gen())
136 return wireprototypes.streamres(gen())
137 finally:
137 finally:
138 state.shallowremote = oldshallow
138 state.shallowremote = oldshallow
139 state.match = oldmatch
139 state.match = oldmatch
140 state.noflatmf = oldnoflatmf
140 state.noflatmf = oldnoflatmf
141
141
142 wireprotov1server.commands[b'stream_out_shallow'] = (
142 wireprotov1server.commands[b'stream_out_shallow'] = (
143 stream_out_shallow,
143 stream_out_shallow,
144 b'*',
144 b'*',
145 )
145 )
146
146
147 # don't clone filelogs to shallow clients
147 # don't clone filelogs to shallow clients
148 def _walkstreamfiles(orig, repo, matcher=None):
148 def _walkstreamfiles(orig, repo, matcher=None):
149 if state.shallowremote:
149 if state.shallowremote:
150 # if we are shallow ourselves, stream our local commits
150 # if we are shallow ourselves, stream our local commits
151 if shallowutil.isenabled(repo):
151 if shallowutil.isenabled(repo):
152 striplen = len(repo.store.path) + 1
152 striplen = len(repo.store.path) + 1
153 readdir = repo.store.rawvfs.readdir
153 readdir = repo.store.rawvfs.readdir
154 visit = [os.path.join(repo.store.path, b'data')]
154 visit = [os.path.join(repo.store.path, b'data')]
155 while visit:
155 while visit:
156 p = visit.pop()
156 p = visit.pop()
157 for f, kind, st in readdir(p, stat=True):
157 for f, kind, st in readdir(p, stat=True):
158 fp = p + b'/' + f
158 fp = p + b'/' + f
159 if kind == stat.S_IFREG:
159 if kind == stat.S_IFREG:
160 if not fp.endswith(b'.i') and not fp.endswith(
160 if not fp.endswith(b'.i') and not fp.endswith(
161 b'.d'
161 b'.d'
162 ):
162 ):
163 n = util.pconvert(fp[striplen:])
163 n = util.pconvert(fp[striplen:])
164 d = store.decodedir(n)
164 d = store.decodedir(n)
165 yield store.SimpleStoreEntry(
165 yield store.SimpleStoreEntry(
166 unencoded_path=d,
166 entry_path=d,
167 is_volatile=False,
167 is_volatile=False,
168 file_size=st.st_size,
168 file_size=st.st_size,
169 )
169 )
170
170
171 if kind == stat.S_IFDIR:
171 if kind == stat.S_IFDIR:
172 visit.append(fp)
172 visit.append(fp)
173
173
174 if scmutil.istreemanifest(repo):
174 if scmutil.istreemanifest(repo):
175 for entry in repo.store.datafiles():
175 for entry in repo.store.datafiles():
176 if not entry.is_revlog:
176 if not entry.is_revlog:
177 continue
177 continue
178 if entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
178 if entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
179 yield entry
179 yield entry
180
180
181 # Return .d and .i files that do not match the shallow pattern
181 # Return .d and .i files that do not match the shallow pattern
182 match = state.match
182 match = state.match
183 if match and not match.always():
183 if match and not match.always():
184 for entry in repo.store.datafiles():
184 for entry in repo.store.datafiles():
185 if not entry.is_revlog:
185 if not entry.is_revlog:
186 continue
186 continue
187 if not state.match(entry.target_id):
187 if not state.match(entry.target_id):
188 yield entry
188 yield entry
189
189
190 for x in repo.store.topfiles():
190 for x in repo.store.topfiles():
191 if state.noflatmf and x[1][:11] == b'00manifest.':
191 if state.noflatmf and x[1][:11] == b'00manifest.':
192 continue
192 continue
193 yield x
193 yield x
194
194
195 elif shallowutil.isenabled(repo):
195 elif shallowutil.isenabled(repo):
196 # don't allow cloning from a shallow repo to a full repo
196 # don't allow cloning from a shallow repo to a full repo
197 # since it would require fetching every version of every
197 # since it would require fetching every version of every
198 # file in order to create the revlogs.
198 # file in order to create the revlogs.
199 raise error.Abort(
199 raise error.Abort(
200 _(b"Cannot clone from a shallow repo to a full repo.")
200 _(b"Cannot clone from a shallow repo to a full repo.")
201 )
201 )
202 else:
202 else:
203 for x in orig(repo, matcher):
203 for x in orig(repo, matcher):
204 yield x
204 yield x
205
205
206 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
206 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
207
207
208 # expose remotefilelog capabilities
208 # expose remotefilelog capabilities
209 def _capabilities(orig, repo, proto):
209 def _capabilities(orig, repo, proto):
210 caps = orig(repo, proto)
210 caps = orig(repo, proto)
211 if shallowutil.isenabled(repo) or ui.configbool(
211 if shallowutil.isenabled(repo) or ui.configbool(
212 b'remotefilelog', b'server'
212 b'remotefilelog', b'server'
213 ):
213 ):
214 if isinstance(proto, _sshv1server):
214 if isinstance(proto, _sshv1server):
215 # legacy getfiles method which only works over ssh
215 # legacy getfiles method which only works over ssh
216 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
216 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
217 caps.append(b'x_rfl_getflogheads')
217 caps.append(b'x_rfl_getflogheads')
218 caps.append(b'x_rfl_getfile')
218 caps.append(b'x_rfl_getfile')
219 return caps
219 return caps
220
220
221 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
221 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
222
222
223 def _adjustlinkrev(orig, self, *args, **kwargs):
223 def _adjustlinkrev(orig, self, *args, **kwargs):
224 # When generating file blobs, taking the real path is too slow on large
224 # When generating file blobs, taking the real path is too slow on large
225 # repos, so force it to just return the linkrev directly.
225 # repos, so force it to just return the linkrev directly.
226 repo = self._repo
226 repo = self._repo
227 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
227 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
228 return self._filelog.linkrev(self._filelog.rev(self._filenode))
228 return self._filelog.linkrev(self._filelog.rev(self._filenode))
229 return orig(self, *args, **kwargs)
229 return orig(self, *args, **kwargs)
230
230
231 extensions.wrapfunction(
231 extensions.wrapfunction(
232 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
232 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
233 )
233 )
234
234
235 def _iscmd(orig, cmd):
235 def _iscmd(orig, cmd):
236 if cmd == b'x_rfl_getfiles':
236 if cmd == b'x_rfl_getfiles':
237 return False
237 return False
238 return orig(cmd)
238 return orig(cmd)
239
239
240 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
240 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
241
241
242
242
243 def _loadfileblob(repo, cachepath, path, node):
243 def _loadfileblob(repo, cachepath, path, node):
244 filecachepath = os.path.join(cachepath, path, hex(node))
244 filecachepath = os.path.join(cachepath, path, hex(node))
245 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
245 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
246 filectx = repo.filectx(path, fileid=node)
246 filectx = repo.filectx(path, fileid=node)
247 if filectx.node() == repo.nullid:
247 if filectx.node() == repo.nullid:
248 repo.changelog = changelog.changelog(repo.svfs)
248 repo.changelog = changelog.changelog(repo.svfs)
249 filectx = repo.filectx(path, fileid=node)
249 filectx = repo.filectx(path, fileid=node)
250
250
251 text = createfileblob(filectx)
251 text = createfileblob(filectx)
252 # TODO configurable compression engines
252 # TODO configurable compression engines
253 text = zlib.compress(text)
253 text = zlib.compress(text)
254
254
255 # everything should be user & group read/writable
255 # everything should be user & group read/writable
256 oldumask = os.umask(0o002)
256 oldumask = os.umask(0o002)
257 try:
257 try:
258 dirname = os.path.dirname(filecachepath)
258 dirname = os.path.dirname(filecachepath)
259 if not os.path.exists(dirname):
259 if not os.path.exists(dirname):
260 try:
260 try:
261 os.makedirs(dirname)
261 os.makedirs(dirname)
262 except FileExistsError:
262 except FileExistsError:
263 pass
263 pass
264
264
265 f = None
265 f = None
266 try:
266 try:
267 f = util.atomictempfile(filecachepath, b"wb")
267 f = util.atomictempfile(filecachepath, b"wb")
268 f.write(text)
268 f.write(text)
269 except (IOError, OSError):
269 except (IOError, OSError):
270 # Don't abort if the user only has permission to read,
270 # Don't abort if the user only has permission to read,
271 # and not write.
271 # and not write.
272 pass
272 pass
273 finally:
273 finally:
274 if f:
274 if f:
275 f.close()
275 f.close()
276 finally:
276 finally:
277 os.umask(oldumask)
277 os.umask(oldumask)
278 else:
278 else:
279 with open(filecachepath, b"rb") as f:
279 with open(filecachepath, b"rb") as f:
280 text = f.read()
280 text = f.read()
281 return text
281 return text
282
282
283
283
284 def getflogheads(repo, proto, path):
284 def getflogheads(repo, proto, path):
285 """A server api for requesting a filelog's heads"""
285 """A server api for requesting a filelog's heads"""
286 flog = repo.file(path)
286 flog = repo.file(path)
287 heads = flog.heads()
287 heads = flog.heads()
288 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
288 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
289
289
290
290
291 def getfile(repo, proto, file, node):
291 def getfile(repo, proto, file, node):
292 """A server api for requesting a particular version of a file. Can be used
292 """A server api for requesting a particular version of a file. Can be used
293 in batches to request many files at once. The return protocol is:
293 in batches to request many files at once. The return protocol is:
294 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
294 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
295 non-zero for an error.
295 non-zero for an error.
296
296
297 data is a compressed blob with revlog flag and ancestors information. See
297 data is a compressed blob with revlog flag and ancestors information. See
298 createfileblob for its content.
298 createfileblob for its content.
299 """
299 """
300 if shallowutil.isenabled(repo):
300 if shallowutil.isenabled(repo):
301 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
301 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
302 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
302 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
303 if not cachepath:
303 if not cachepath:
304 cachepath = os.path.join(repo.path, b"remotefilelogcache")
304 cachepath = os.path.join(repo.path, b"remotefilelogcache")
305 node = bin(node.strip())
305 node = bin(node.strip())
306 if node == repo.nullid:
306 if node == repo.nullid:
307 return b'0\0'
307 return b'0\0'
308 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
308 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
309
309
310
310
311 def getfiles(repo, proto):
311 def getfiles(repo, proto):
312 """A server api for requesting particular versions of particular files."""
312 """A server api for requesting particular versions of particular files."""
313 if shallowutil.isenabled(repo):
313 if shallowutil.isenabled(repo):
314 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
314 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
315 if not isinstance(proto, _sshv1server):
315 if not isinstance(proto, _sshv1server):
316 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
316 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
317
317
318 def streamer():
318 def streamer():
319 fin = proto._fin
319 fin = proto._fin
320
320
321 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
321 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
322 if not cachepath:
322 if not cachepath:
323 cachepath = os.path.join(repo.path, b"remotefilelogcache")
323 cachepath = os.path.join(repo.path, b"remotefilelogcache")
324
324
325 while True:
325 while True:
326 request = fin.readline()[:-1]
326 request = fin.readline()[:-1]
327 if not request:
327 if not request:
328 break
328 break
329
329
330 node = bin(request[:40])
330 node = bin(request[:40])
331 if node == repo.nullid:
331 if node == repo.nullid:
332 yield b'0\n'
332 yield b'0\n'
333 continue
333 continue
334
334
335 path = request[40:]
335 path = request[40:]
336
336
337 text = _loadfileblob(repo, cachepath, path, node)
337 text = _loadfileblob(repo, cachepath, path, node)
338
338
339 yield b'%d\n%s' % (len(text), text)
339 yield b'%d\n%s' % (len(text), text)
340
340
341 # it would be better to only flush after processing a whole batch
341 # it would be better to only flush after processing a whole batch
342 # but currently we don't know if there are more requests coming
342 # but currently we don't know if there are more requests coming
343 proto._fout.flush()
343 proto._fout.flush()
344
344
345 return wireprototypes.streamres(streamer())
345 return wireprototypes.streamres(streamer())
346
346
347
347
348 def createfileblob(filectx):
348 def createfileblob(filectx):
349 """
349 """
350 format:
350 format:
351 v0:
351 v0:
352 str(len(rawtext)) + '\0' + rawtext + ancestortext
352 str(len(rawtext)) + '\0' + rawtext + ancestortext
353 v1:
353 v1:
354 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
354 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
355 metalist := metalist + '\n' + meta | meta
355 metalist := metalist + '\n' + meta | meta
356 meta := sizemeta | flagmeta
356 meta := sizemeta | flagmeta
357 sizemeta := METAKEYSIZE + str(len(rawtext))
357 sizemeta := METAKEYSIZE + str(len(rawtext))
358 flagmeta := METAKEYFLAG + str(flag)
358 flagmeta := METAKEYFLAG + str(flag)
359
359
360 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
360 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
361 length of 1.
361 length of 1.
362 """
362 """
363 flog = filectx.filelog()
363 flog = filectx.filelog()
364 frev = filectx.filerev()
364 frev = filectx.filerev()
365 revlogflags = flog._revlog.flags(frev)
365 revlogflags = flog._revlog.flags(frev)
366 if revlogflags == 0:
366 if revlogflags == 0:
367 # normal files
367 # normal files
368 text = filectx.data()
368 text = filectx.data()
369 else:
369 else:
370 # lfs, read raw revision data
370 # lfs, read raw revision data
371 text = flog.rawdata(frev)
371 text = flog.rawdata(frev)
372
372
373 repo = filectx._repo
373 repo = filectx._repo
374
374
375 ancestors = [filectx]
375 ancestors = [filectx]
376
376
377 try:
377 try:
378 repo.forcelinkrev = True
378 repo.forcelinkrev = True
379 ancestors.extend([f for f in filectx.ancestors()])
379 ancestors.extend([f for f in filectx.ancestors()])
380
380
381 ancestortext = b""
381 ancestortext = b""
382 for ancestorctx in ancestors:
382 for ancestorctx in ancestors:
383 parents = ancestorctx.parents()
383 parents = ancestorctx.parents()
384 p1 = repo.nullid
384 p1 = repo.nullid
385 p2 = repo.nullid
385 p2 = repo.nullid
386 if len(parents) > 0:
386 if len(parents) > 0:
387 p1 = parents[0].filenode()
387 p1 = parents[0].filenode()
388 if len(parents) > 1:
388 if len(parents) > 1:
389 p2 = parents[1].filenode()
389 p2 = parents[1].filenode()
390
390
391 copyname = b""
391 copyname = b""
392 rename = ancestorctx.renamed()
392 rename = ancestorctx.renamed()
393 if rename:
393 if rename:
394 copyname = rename[0]
394 copyname = rename[0]
395 linknode = ancestorctx.node()
395 linknode = ancestorctx.node()
396 ancestortext += b"%s%s%s%s%s\0" % (
396 ancestortext += b"%s%s%s%s%s\0" % (
397 ancestorctx.filenode(),
397 ancestorctx.filenode(),
398 p1,
398 p1,
399 p2,
399 p2,
400 linknode,
400 linknode,
401 copyname,
401 copyname,
402 )
402 )
403 finally:
403 finally:
404 repo.forcelinkrev = False
404 repo.forcelinkrev = False
405
405
406 header = shallowutil.buildfileblobheader(len(text), revlogflags)
406 header = shallowutil.buildfileblobheader(len(text), revlogflags)
407
407
408 return b"%s\0%s%s" % (header, text, ancestortext)
408 return b"%s\0%s%s" % (header, text, ancestortext)
409
409
410
410
411 def gcserver(ui, repo):
411 def gcserver(ui, repo):
412 if not repo.ui.configbool(b"remotefilelog", b"server"):
412 if not repo.ui.configbool(b"remotefilelog", b"server"):
413 return
413 return
414
414
415 neededfiles = set()
415 neededfiles = set()
416 heads = repo.revs(b"heads(tip~25000:) - null")
416 heads = repo.revs(b"heads(tip~25000:) - null")
417
417
418 cachepath = repo.vfs.join(b"remotefilelogcache")
418 cachepath = repo.vfs.join(b"remotefilelogcache")
419 for head in heads:
419 for head in heads:
420 mf = repo[head].manifest()
420 mf = repo[head].manifest()
421 for filename, filenode in mf.items():
421 for filename, filenode in mf.items():
422 filecachepath = os.path.join(cachepath, filename, hex(filenode))
422 filecachepath = os.path.join(cachepath, filename, hex(filenode))
423 neededfiles.add(filecachepath)
423 neededfiles.add(filecachepath)
424
424
425 # delete unneeded older files
425 # delete unneeded older files
426 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
426 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
427 expiration = time.time() - (days * 24 * 60 * 60)
427 expiration = time.time() - (days * 24 * 60 * 60)
428
428
429 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
429 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
430 progress.update(0)
430 progress.update(0)
431 for root, dirs, files in os.walk(cachepath):
431 for root, dirs, files in os.walk(cachepath):
432 for file in files:
432 for file in files:
433 filepath = os.path.join(root, file)
433 filepath = os.path.join(root, file)
434 progress.increment()
434 progress.increment()
435 if filepath in neededfiles:
435 if filepath in neededfiles:
436 continue
436 continue
437
437
438 stat = os.stat(filepath)
438 stat = os.stat(filepath)
439 if stat.st_mtime < expiration:
439 if stat.st_mtime < expiration:
440 os.remove(filepath)
440 os.remove(filepath)
441
441
442 progress.complete()
442 progress.complete()
@@ -1,1056 +1,1056 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import collections
8 import collections
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _match_tracked_entry(entry, matcher):
36 def _match_tracked_entry(entry, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 if entry.revlog_type == FILEFLAGS_FILELOG:
44 if entry.revlog_type == FILEFLAGS_FILELOG:
45 return matcher(entry.target_id)
45 return matcher(entry.target_id)
46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49
49
50
50
51 # This avoids a collision between a file named foo and a dir named
51 # This avoids a collision between a file named foo and a dir named
52 # foo.i or foo.d
52 # foo.i or foo.d
53 def _encodedir(path):
53 def _encodedir(path):
54 """
54 """
55 >>> _encodedir(b'data/foo.i')
55 >>> _encodedir(b'data/foo.i')
56 'data/foo.i'
56 'data/foo.i'
57 >>> _encodedir(b'data/foo.i/bla.i')
57 >>> _encodedir(b'data/foo.i/bla.i')
58 'data/foo.i.hg/bla.i'
58 'data/foo.i.hg/bla.i'
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 'data/foo.i.hg.hg/bla.i'
60 'data/foo.i.hg.hg/bla.i'
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 """
63 """
64 return (
64 return (
65 path.replace(b".hg/", b".hg.hg/")
65 path.replace(b".hg/", b".hg.hg/")
66 .replace(b".i/", b".i.hg/")
66 .replace(b".i/", b".i.hg/")
67 .replace(b".d/", b".d.hg/")
67 .replace(b".d/", b".d.hg/")
68 )
68 )
69
69
70
70
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72
72
73
73
74 def decodedir(path):
74 def decodedir(path):
75 """
75 """
76 >>> decodedir(b'data/foo.i')
76 >>> decodedir(b'data/foo.i')
77 'data/foo.i'
77 'data/foo.i'
78 >>> decodedir(b'data/foo.i.hg/bla.i')
78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 'data/foo.i/bla.i'
79 'data/foo.i/bla.i'
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 'data/foo.i.hg/bla.i'
81 'data/foo.i.hg/bla.i'
82 """
82 """
83 if b".hg/" not in path:
83 if b".hg/" not in path:
84 return path
84 return path
85 return (
85 return (
86 path.replace(b".d.hg/", b".d/")
86 path.replace(b".d.hg/", b".d/")
87 .replace(b".i.hg/", b".i/")
87 .replace(b".i.hg/", b".i/")
88 .replace(b".hg.hg/", b".hg/")
88 .replace(b".hg.hg/", b".hg/")
89 )
89 )
90
90
91
91
92 def _reserved():
92 def _reserved():
93 """characters that are problematic for filesystems
93 """characters that are problematic for filesystems
94
94
95 * ascii escapes (0..31)
95 * ascii escapes (0..31)
96 * ascii hi (126..255)
96 * ascii hi (126..255)
97 * windows specials
97 * windows specials
98
98
99 these characters will be escaped by encodefunctions
99 these characters will be escaped by encodefunctions
100 """
100 """
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 for x in range(32):
102 for x in range(32):
103 yield x
103 yield x
104 for x in range(126, 256):
104 for x in range(126, 256):
105 yield x
105 yield x
106 for x in winreserved:
106 for x in winreserved:
107 yield x
107 yield x
108
108
109
109
110 def _buildencodefun():
110 def _buildencodefun():
111 """
111 """
112 >>> enc, dec = _buildencodefun()
112 >>> enc, dec = _buildencodefun()
113
113
114 >>> enc(b'nothing/special.txt')
114 >>> enc(b'nothing/special.txt')
115 'nothing/special.txt'
115 'nothing/special.txt'
116 >>> dec(b'nothing/special.txt')
116 >>> dec(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118
118
119 >>> enc(b'HELLO')
119 >>> enc(b'HELLO')
120 '_h_e_l_l_o'
120 '_h_e_l_l_o'
121 >>> dec(b'_h_e_l_l_o')
121 >>> dec(b'_h_e_l_l_o')
122 'HELLO'
122 'HELLO'
123
123
124 >>> enc(b'hello:world?')
124 >>> enc(b'hello:world?')
125 'hello~3aworld~3f'
125 'hello~3aworld~3f'
126 >>> dec(b'hello~3aworld~3f')
126 >>> dec(b'hello~3aworld~3f')
127 'hello:world?'
127 'hello:world?'
128
128
129 >>> enc(b'the\\x07quick\\xADshot')
129 >>> enc(b'the\\x07quick\\xADshot')
130 'the~07quick~adshot'
130 'the~07quick~adshot'
131 >>> dec(b'the~07quick~adshot')
131 >>> dec(b'the~07quick~adshot')
132 'the\\x07quick\\xadshot'
132 'the\\x07quick\\xadshot'
133 """
133 """
134 e = b'_'
134 e = b'_'
135 xchr = pycompat.bytechr
135 xchr = pycompat.bytechr
136 asciistr = list(map(xchr, range(127)))
136 asciistr = list(map(xchr, range(127)))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138
138
139 cmap = {x: x for x in asciistr}
139 cmap = {x: x for x in asciistr}
140 for x in _reserved():
140 for x in _reserved():
141 cmap[xchr(x)] = b"~%02x" % x
141 cmap[xchr(x)] = b"~%02x" % x
142 for x in capitals + [ord(e)]:
142 for x in capitals + [ord(e)]:
143 cmap[xchr(x)] = e + xchr(x).lower()
143 cmap[xchr(x)] = e + xchr(x).lower()
144
144
145 dmap = {}
145 dmap = {}
146 for k, v in cmap.items():
146 for k, v in cmap.items():
147 dmap[v] = k
147 dmap[v] = k
148
148
149 def decode(s):
149 def decode(s):
150 i = 0
150 i = 0
151 while i < len(s):
151 while i < len(s):
152 for l in range(1, 4):
152 for l in range(1, 4):
153 try:
153 try:
154 yield dmap[s[i : i + l]]
154 yield dmap[s[i : i + l]]
155 i += l
155 i += l
156 break
156 break
157 except KeyError:
157 except KeyError:
158 pass
158 pass
159 else:
159 else:
160 raise KeyError
160 raise KeyError
161
161
162 return (
162 return (
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 lambda s: b''.join(list(decode(s))),
164 lambda s: b''.join(list(decode(s))),
165 )
165 )
166
166
167
167
168 _encodefname, _decodefname = _buildencodefun()
168 _encodefname, _decodefname = _buildencodefun()
169
169
170
170
171 def encodefilename(s):
171 def encodefilename(s):
172 """
172 """
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 """
175 """
176 return _encodefname(encodedir(s))
176 return _encodefname(encodedir(s))
177
177
178
178
179 def decodefilename(s):
179 def decodefilename(s):
180 """
180 """
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 """
183 """
184 return decodedir(_decodefname(s))
184 return decodedir(_decodefname(s))
185
185
186
186
187 def _buildlowerencodefun():
187 def _buildlowerencodefun():
188 """
188 """
189 >>> f = _buildlowerencodefun()
189 >>> f = _buildlowerencodefun()
190 >>> f(b'nothing/special.txt')
190 >>> f(b'nothing/special.txt')
191 'nothing/special.txt'
191 'nothing/special.txt'
192 >>> f(b'HELLO')
192 >>> f(b'HELLO')
193 'hello'
193 'hello'
194 >>> f(b'hello:world?')
194 >>> f(b'hello:world?')
195 'hello~3aworld~3f'
195 'hello~3aworld~3f'
196 >>> f(b'the\\x07quick\\xADshot')
196 >>> f(b'the\\x07quick\\xADshot')
197 'the~07quick~adshot'
197 'the~07quick~adshot'
198 """
198 """
199 xchr = pycompat.bytechr
199 xchr = pycompat.bytechr
200 cmap = {xchr(x): xchr(x) for x in range(127)}
200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 for x in _reserved():
201 for x in _reserved():
202 cmap[xchr(x)] = b"~%02x" % x
202 cmap[xchr(x)] = b"~%02x" % x
203 for x in range(ord(b"A"), ord(b"Z") + 1):
203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 cmap[xchr(x)] = xchr(x).lower()
204 cmap[xchr(x)] = xchr(x).lower()
205
205
206 def lowerencode(s):
206 def lowerencode(s):
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208
208
209 return lowerencode
209 return lowerencode
210
210
211
211
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213
213
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217
217
218
218
219 def _auxencode(path, dotencode):
219 def _auxencode(path, dotencode):
220 """
220 """
221 Encodes filenames containing names reserved by Windows or which end in
221 Encodes filenames containing names reserved by Windows or which end in
222 period or space. Does not touch other single reserved characters c.
222 period or space. Does not touch other single reserved characters c.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 Additionally encodes space or period at the beginning, if dotencode is
224 Additionally encodes space or period at the beginning, if dotencode is
225 True. Parameter path is assumed to be all lowercase.
225 True. Parameter path is assumed to be all lowercase.
226 A segment only needs encoding if a reserved name appears as a
226 A segment only needs encoding if a reserved name appears as a
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 doesn't need encoding.
228 doesn't need encoding.
229
229
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 >>> _auxencode(s.split(b'/'), True)
231 >>> _auxencode(s.split(b'/'), True)
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 >>> _auxencode(s.split(b'/'), False)
234 >>> _auxencode(s.split(b'/'), False)
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 >>> _auxencode([b'foo. '], True)
236 >>> _auxencode([b'foo. '], True)
237 ['foo.~20']
237 ['foo.~20']
238 >>> _auxencode([b' .foo'], True)
238 >>> _auxencode([b' .foo'], True)
239 ['~20.foo']
239 ['~20.foo']
240 """
240 """
241 for i, n in enumerate(path):
241 for i, n in enumerate(path):
242 if not n:
242 if not n:
243 continue
243 continue
244 if dotencode and n[0] in b'. ':
244 if dotencode and n[0] in b'. ':
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 path[i] = n
246 path[i] = n
247 else:
247 else:
248 l = n.find(b'.')
248 l = n.find(b'.')
249 if l == -1:
249 if l == -1:
250 l = len(n)
250 l = len(n)
251 if (l == 3 and n[:3] in _winres3) or (
251 if (l == 3 and n[:3] in _winres3) or (
252 l == 4
252 l == 4
253 and n[3:4] <= b'9'
253 and n[3:4] <= b'9'
254 and n[3:4] >= b'1'
254 and n[3:4] >= b'1'
255 and n[:3] in _winres4
255 and n[:3] in _winres4
256 ):
256 ):
257 # encode third letter ('aux' -> 'au~78')
257 # encode third letter ('aux' -> 'au~78')
258 ec = b"~%02x" % ord(n[2:3])
258 ec = b"~%02x" % ord(n[2:3])
259 n = n[0:2] + ec + n[3:]
259 n = n[0:2] + ec + n[3:]
260 path[i] = n
260 path[i] = n
261 if n[-1] in b'. ':
261 if n[-1] in b'. ':
262 # encode last period or space ('foo...' -> 'foo..~2e')
262 # encode last period or space ('foo...' -> 'foo..~2e')
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 return path
264 return path
265
265
266
266
267 _maxstorepathlen = 120
267 _maxstorepathlen = 120
268 _dirprefixlen = 8
268 _dirprefixlen = 8
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270
270
271
271
272 def _hashencode(path, dotencode):
272 def _hashencode(path, dotencode):
273 digest = hex(hashutil.sha1(path).digest())
273 digest = hex(hashutil.sha1(path).digest())
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 parts = _auxencode(le, dotencode)
275 parts = _auxencode(le, dotencode)
276 basename = parts[-1]
276 basename = parts[-1]
277 _root, ext = os.path.splitext(basename)
277 _root, ext = os.path.splitext(basename)
278 sdirs = []
278 sdirs = []
279 sdirslen = 0
279 sdirslen = 0
280 for p in parts[:-1]:
280 for p in parts[:-1]:
281 d = p[:_dirprefixlen]
281 d = p[:_dirprefixlen]
282 if d[-1] in b'. ':
282 if d[-1] in b'. ':
283 # Windows can't access dirs ending in period or space
283 # Windows can't access dirs ending in period or space
284 d = d[:-1] + b'_'
284 d = d[:-1] + b'_'
285 if sdirslen == 0:
285 if sdirslen == 0:
286 t = len(d)
286 t = len(d)
287 else:
287 else:
288 t = sdirslen + 1 + len(d)
288 t = sdirslen + 1 + len(d)
289 if t > _maxshortdirslen:
289 if t > _maxshortdirslen:
290 break
290 break
291 sdirs.append(d)
291 sdirs.append(d)
292 sdirslen = t
292 sdirslen = t
293 dirs = b'/'.join(sdirs)
293 dirs = b'/'.join(sdirs)
294 if len(dirs) > 0:
294 if len(dirs) > 0:
295 dirs += b'/'
295 dirs += b'/'
296 res = b'dh/' + dirs + digest + ext
296 res = b'dh/' + dirs + digest + ext
297 spaceleft = _maxstorepathlen - len(res)
297 spaceleft = _maxstorepathlen - len(res)
298 if spaceleft > 0:
298 if spaceleft > 0:
299 filler = basename[:spaceleft]
299 filler = basename[:spaceleft]
300 res = b'dh/' + dirs + filler + digest + ext
300 res = b'dh/' + dirs + filler + digest + ext
301 return res
301 return res
302
302
303
303
304 def _hybridencode(path, dotencode):
304 def _hybridencode(path, dotencode):
305 """encodes path with a length limit
305 """encodes path with a length limit
306
306
307 Encodes all paths that begin with 'data/', according to the following.
307 Encodes all paths that begin with 'data/', according to the following.
308
308
309 Default encoding (reversible):
309 Default encoding (reversible):
310
310
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 characters are encoded as '~xx', where xx is the two digit hex code
312 characters are encoded as '~xx', where xx is the two digit hex code
313 of the character (see encodefilename).
313 of the character (see encodefilename).
314 Relevant path components consisting of Windows reserved filenames are
314 Relevant path components consisting of Windows reserved filenames are
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316
316
317 Hashed encoding (not reversible):
317 Hashed encoding (not reversible):
318
318
319 If the default-encoded path is longer than _maxstorepathlen, a
319 If the default-encoded path is longer than _maxstorepathlen, a
320 non-reversible hybrid hashing of the path is done instead.
320 non-reversible hybrid hashing of the path is done instead.
321 This encoding uses up to _dirprefixlen characters of all directory
321 This encoding uses up to _dirprefixlen characters of all directory
322 levels of the lowerencoded path, but not more levels than can fit into
322 levels of the lowerencoded path, but not more levels than can fit into
323 _maxshortdirslen.
323 _maxshortdirslen.
324 Then follows the filler followed by the sha digest of the full path.
324 Then follows the filler followed by the sha digest of the full path.
325 The filler is the beginning of the basename of the lowerencoded path
325 The filler is the beginning of the basename of the lowerencoded path
326 (the basename is everything after the last path separator). The filler
326 (the basename is everything after the last path separator). The filler
327 is as long as possible, filling in characters from the basename until
327 is as long as possible, filling in characters from the basename until
328 the encoded path has _maxstorepathlen characters (or all chars of the
328 the encoded path has _maxstorepathlen characters (or all chars of the
329 basename have been taken).
329 basename have been taken).
330 The extension (e.g. '.i' or '.d') is preserved.
330 The extension (e.g. '.i' or '.d') is preserved.
331
331
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 encoding was used.
333 encoding was used.
334 """
334 """
335 path = encodedir(path)
335 path = encodedir(path)
336 ef = _encodefname(path).split(b'/')
336 ef = _encodefname(path).split(b'/')
337 res = b'/'.join(_auxencode(ef, dotencode))
337 res = b'/'.join(_auxencode(ef, dotencode))
338 if len(res) > _maxstorepathlen:
338 if len(res) > _maxstorepathlen:
339 res = _hashencode(path, dotencode)
339 res = _hashencode(path, dotencode)
340 return res
340 return res
341
341
342
342
343 def _pathencode(path):
343 def _pathencode(path):
344 de = encodedir(path)
344 de = encodedir(path)
345 if len(path) > _maxstorepathlen:
345 if len(path) > _maxstorepathlen:
346 return _hashencode(de, True)
346 return _hashencode(de, True)
347 ef = _encodefname(de).split(b'/')
347 ef = _encodefname(de).split(b'/')
348 res = b'/'.join(_auxencode(ef, True))
348 res = b'/'.join(_auxencode(ef, True))
349 if len(res) > _maxstorepathlen:
349 if len(res) > _maxstorepathlen:
350 return _hashencode(de, True)
350 return _hashencode(de, True)
351 return res
351 return res
352
352
353
353
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355
355
356
356
357 def _plainhybridencode(f):
357 def _plainhybridencode(f):
358 return _hybridencode(f, False)
358 return _hybridencode(f, False)
359
359
360
360
361 def _calcmode(vfs):
361 def _calcmode(vfs):
362 try:
362 try:
363 # files in .hg/ will be created using this mode
363 # files in .hg/ will be created using this mode
364 mode = vfs.stat().st_mode
364 mode = vfs.stat().st_mode
365 # avoid some useless chmods
365 # avoid some useless chmods
366 if (0o777 & ~util.umask) == (0o777 & mode):
366 if (0o777 & ~util.umask) == (0o777 & mode):
367 mode = None
367 mode = None
368 except OSError:
368 except OSError:
369 mode = None
369 mode = None
370 return mode
370 return mode
371
371
372
372
373 _data = [
373 _data = [
374 b'bookmarks',
374 b'bookmarks',
375 b'narrowspec',
375 b'narrowspec',
376 b'data',
376 b'data',
377 b'meta',
377 b'meta',
378 b'00manifest.d',
378 b'00manifest.d',
379 b'00manifest.i',
379 b'00manifest.i',
380 b'00changelog.d',
380 b'00changelog.d',
381 b'00changelog.i',
381 b'00changelog.i',
382 b'phaseroots',
382 b'phaseroots',
383 b'obsstore',
383 b'obsstore',
384 b'requires',
384 b'requires',
385 ]
385 ]
386
386
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
387 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 REVLOG_FILES_OTHER_EXT = (
388 REVLOG_FILES_OTHER_EXT = (
389 b'.idx',
389 b'.idx',
390 b'.d',
390 b'.d',
391 b'.dat',
391 b'.dat',
392 b'.n',
392 b'.n',
393 b'.nd',
393 b'.nd',
394 b'.sda',
394 b'.sda',
395 )
395 )
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
396 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 REVLOG_FILES_LONG_EXT = (
397 REVLOG_FILES_LONG_EXT = (
398 b'.nd',
398 b'.nd',
399 b'.idx',
399 b'.idx',
400 b'.dat',
400 b'.dat',
401 b'.sda',
401 b'.sda',
402 )
402 )
403 # files that are "volatile" and might change between listing and streaming
403 # files that are "volatile" and might change between listing and streaming
404 #
404 #
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
405 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 # deleted.
406 # deleted.
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408
408
409 # some exception to the above matching
409 # some exception to the above matching
410 #
410 #
411 # XXX This is currently not in use because of issue6542
411 # XXX This is currently not in use because of issue6542
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413
413
414
414
415 def is_revlog(f, kind, st):
415 def is_revlog(f, kind, st):
416 if kind != stat.S_IFREG:
416 if kind != stat.S_IFREG:
417 return None
417 return None
418 return revlog_type(f)
418 return revlog_type(f)
419
419
420
420
421 def revlog_type(f):
421 def revlog_type(f):
422 # XXX we need to filter `undo.` created by the transaction here, however
422 # XXX we need to filter `undo.` created by the transaction here, however
423 # being naive about it also filter revlog for `undo.*` files, leading to
423 # being naive about it also filter revlog for `undo.*` files, leading to
424 # issue6542. So we no longer use EXCLUDED.
424 # issue6542. So we no longer use EXCLUDED.
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
425 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 return FILEFLAGS_REVLOG_MAIN
426 return FILEFLAGS_REVLOG_MAIN
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 t = FILETYPE_FILELOG_OTHER
428 t = FILETYPE_FILELOG_OTHER
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 t |= FILEFLAGS_VOLATILE
430 t |= FILEFLAGS_VOLATILE
431 return t
431 return t
432 return None
432 return None
433
433
434
434
435 # the file is part of changelog data
435 # the file is part of changelog data
436 FILEFLAGS_CHANGELOG = 1 << 13
436 FILEFLAGS_CHANGELOG = 1 << 13
437 # the file is part of manifest data
437 # the file is part of manifest data
438 FILEFLAGS_MANIFESTLOG = 1 << 12
438 FILEFLAGS_MANIFESTLOG = 1 << 12
439 # the file is part of filelog data
439 # the file is part of filelog data
440 FILEFLAGS_FILELOG = 1 << 11
440 FILEFLAGS_FILELOG = 1 << 11
441 # file that are not directly part of a revlog
441 # file that are not directly part of a revlog
442 FILEFLAGS_OTHER = 1 << 10
442 FILEFLAGS_OTHER = 1 << 10
443
443
444 # the main entry point for a revlog
444 # the main entry point for a revlog
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
445 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 # a secondary file for a revlog
446 # a secondary file for a revlog
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
447 FILEFLAGS_REVLOG_OTHER = 1 << 0
448
448
449 # files that are "volatile" and might change between listing and streaming
449 # files that are "volatile" and might change between listing and streaming
450 FILEFLAGS_VOLATILE = 1 << 20
450 FILEFLAGS_VOLATILE = 1 << 20
451
451
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
458 FILETYPE_OTHER = FILEFLAGS_OTHER
459
459
460
460
461 @attr.s(slots=True, init=False)
461 @attr.s(slots=True, init=False)
462 class BaseStoreEntry:
462 class BaseStoreEntry:
463 """An entry in the store
463 """An entry in the store
464
464
465 This is returned by `store.walk` and represent some data in the store."""
465 This is returned by `store.walk` and represent some data in the store."""
466
466
467 unencoded_path = attr.ib()
467 _entry_path = attr.ib()
468 _is_volatile = attr.ib(default=False)
468 _is_volatile = attr.ib(default=False)
469 _file_size = attr.ib(default=None)
469 _file_size = attr.ib(default=None)
470
470
471 def __init__(
471 def __init__(
472 self,
472 self,
473 unencoded_path,
473 entry_path,
474 is_volatile=False,
474 is_volatile=False,
475 file_size=None,
475 file_size=None,
476 ):
476 ):
477 self.unencoded_path = unencoded_path
477 self._entry_path = entry_path
478 self._is_volatile = is_volatile
478 self._is_volatile = is_volatile
479 self._file_size = file_size
479 self._file_size = file_size
480
480
481 def files(self):
481 def files(self):
482 return [
482 return [
483 StoreFile(
483 StoreFile(
484 unencoded_path=self.unencoded_path,
484 unencoded_path=self._entry_path,
485 file_size=self._file_size,
485 file_size=self._file_size,
486 is_volatile=self._is_volatile,
486 is_volatile=self._is_volatile,
487 )
487 )
488 ]
488 ]
489
489
490
490
491 @attr.s(slots=True, init=False)
491 @attr.s(slots=True, init=False)
492 class SimpleStoreEntry(BaseStoreEntry):
492 class SimpleStoreEntry(BaseStoreEntry):
493 """A generic entry in the store"""
493 """A generic entry in the store"""
494
494
495 is_revlog = False
495 is_revlog = False
496
496
497
497
498 @attr.s(slots=True, init=False)
498 @attr.s(slots=True, init=False)
499 class RevlogStoreEntry(BaseStoreEntry):
499 class RevlogStoreEntry(BaseStoreEntry):
500 """A revlog entry in the store"""
500 """A revlog entry in the store"""
501
501
502 is_revlog = True
502 is_revlog = True
503 revlog_type = attr.ib(default=None)
503 revlog_type = attr.ib(default=None)
504 target_id = attr.ib(default=None)
504 target_id = attr.ib(default=None)
505 is_revlog_main = attr.ib(default=None)
505 is_revlog_main = attr.ib(default=None)
506
506
507 def __init__(
507 def __init__(
508 self,
508 self,
509 unencoded_path,
509 entry_path,
510 revlog_type,
510 revlog_type,
511 target_id,
511 target_id,
512 is_revlog_main=False,
512 is_revlog_main=False,
513 is_volatile=False,
513 is_volatile=False,
514 file_size=None,
514 file_size=None,
515 ):
515 ):
516 super().__init__(
516 super().__init__(
517 unencoded_path=unencoded_path,
517 entry_path=entry_path,
518 is_volatile=is_volatile,
518 is_volatile=is_volatile,
519 file_size=file_size,
519 file_size=file_size,
520 )
520 )
521 self.revlog_type = revlog_type
521 self.revlog_type = revlog_type
522 self.target_id = target_id
522 self.target_id = target_id
523 self.is_revlog_main = is_revlog_main
523 self.is_revlog_main = is_revlog_main
524
524
525 def main_file_path(self):
525 def main_file_path(self):
526 """unencoded path of the main revlog file"""
526 """unencoded path of the main revlog file"""
527 return self.unencoded_path
527 return self._entry_path
528
528
529
529
530 @attr.s(slots=True)
530 @attr.s(slots=True)
531 class StoreFile:
531 class StoreFile:
532 """a file matching an entry"""
532 """a file matching an entry"""
533
533
534 unencoded_path = attr.ib()
534 unencoded_path = attr.ib()
535 _file_size = attr.ib(default=False)
535 _file_size = attr.ib(default=False)
536 is_volatile = attr.ib(default=False)
536 is_volatile = attr.ib(default=False)
537
537
538 def file_size(self, vfs):
538 def file_size(self, vfs):
539 if self._file_size is not None:
539 if self._file_size is not None:
540 return self._file_size
540 return self._file_size
541 try:
541 try:
542 return vfs.stat(self.unencoded_path).st_size
542 return vfs.stat(self.unencoded_path).st_size
543 except FileNotFoundError:
543 except FileNotFoundError:
544 return 0
544 return 0
545
545
546
546
547 def _gather_revlog(files_data):
547 def _gather_revlog(files_data):
548 """group files per revlog prefix
548 """group files per revlog prefix
549
549
550 The returns a two level nested dict. The top level key is the revlog prefix
550 The returns a two level nested dict. The top level key is the revlog prefix
551 without extension, the second level is all the file "suffix" that were
551 without extension, the second level is all the file "suffix" that were
552 seen for this revlog and arbitrary file data as value.
552 seen for this revlog and arbitrary file data as value.
553 """
553 """
554 revlogs = collections.defaultdict(dict)
554 revlogs = collections.defaultdict(dict)
555 for u, value in files_data:
555 for u, value in files_data:
556 name, ext = _split_revlog_ext(u)
556 name, ext = _split_revlog_ext(u)
557 revlogs[name][ext] = value
557 revlogs[name][ext] = value
558 return sorted(revlogs.items())
558 return sorted(revlogs.items())
559
559
560
560
561 def _split_revlog_ext(filename):
561 def _split_revlog_ext(filename):
562 """split the revlog file prefix from the variable extension"""
562 """split the revlog file prefix from the variable extension"""
563 if filename.endswith(REVLOG_FILES_LONG_EXT):
563 if filename.endswith(REVLOG_FILES_LONG_EXT):
564 char = b'-'
564 char = b'-'
565 else:
565 else:
566 char = b'.'
566 char = b'.'
567 idx = filename.rfind(char)
567 idx = filename.rfind(char)
568 return filename[:idx], filename[idx:]
568 return filename[:idx], filename[idx:]
569
569
570
570
571 def _ext_key(ext):
571 def _ext_key(ext):
572 """a key to order revlog suffix
572 """a key to order revlog suffix
573
573
574 important to issue .i after other entry."""
574 important to issue .i after other entry."""
575 # the only important part of this order is to keep the `.i` last.
575 # the only important part of this order is to keep the `.i` last.
576 if ext.endswith(b'.n'):
576 if ext.endswith(b'.n'):
577 return (0, ext)
577 return (0, ext)
578 elif ext.endswith(b'.nd'):
578 elif ext.endswith(b'.nd'):
579 return (10, ext)
579 return (10, ext)
580 elif ext.endswith(b'.d'):
580 elif ext.endswith(b'.d'):
581 return (20, ext)
581 return (20, ext)
582 elif ext.endswith(b'.i'):
582 elif ext.endswith(b'.i'):
583 return (50, ext)
583 return (50, ext)
584 else:
584 else:
585 return (40, ext)
585 return (40, ext)
586
586
587
587
588 class basicstore:
588 class basicstore:
589 '''base class for local repository stores'''
589 '''base class for local repository stores'''
590
590
591 def __init__(self, path, vfstype):
591 def __init__(self, path, vfstype):
592 vfs = vfstype(path)
592 vfs = vfstype(path)
593 self.path = vfs.base
593 self.path = vfs.base
594 self.createmode = _calcmode(vfs)
594 self.createmode = _calcmode(vfs)
595 vfs.createmode = self.createmode
595 vfs.createmode = self.createmode
596 self.rawvfs = vfs
596 self.rawvfs = vfs
597 self.vfs = vfsmod.filtervfs(vfs, encodedir)
597 self.vfs = vfsmod.filtervfs(vfs, encodedir)
598 self.opener = self.vfs
598 self.opener = self.vfs
599
599
600 def join(self, f):
600 def join(self, f):
601 return self.path + b'/' + encodedir(f)
601 return self.path + b'/' + encodedir(f)
602
602
603 def _walk(self, relpath, recurse, undecodable=None):
603 def _walk(self, relpath, recurse, undecodable=None):
604 '''yields (revlog_type, unencoded, size)'''
604 '''yields (revlog_type, unencoded, size)'''
605 path = self.path
605 path = self.path
606 if relpath:
606 if relpath:
607 path += b'/' + relpath
607 path += b'/' + relpath
608 striplen = len(self.path) + 1
608 striplen = len(self.path) + 1
609 l = []
609 l = []
610 if self.rawvfs.isdir(path):
610 if self.rawvfs.isdir(path):
611 visit = [path]
611 visit = [path]
612 readdir = self.rawvfs.readdir
612 readdir = self.rawvfs.readdir
613 while visit:
613 while visit:
614 p = visit.pop()
614 p = visit.pop()
615 for f, kind, st in readdir(p, stat=True):
615 for f, kind, st in readdir(p, stat=True):
616 fp = p + b'/' + f
616 fp = p + b'/' + f
617 rl_type = is_revlog(f, kind, st)
617 rl_type = is_revlog(f, kind, st)
618 if rl_type is not None:
618 if rl_type is not None:
619 n = util.pconvert(fp[striplen:])
619 n = util.pconvert(fp[striplen:])
620 l.append((decodedir(n), (rl_type, st.st_size)))
620 l.append((decodedir(n), (rl_type, st.st_size)))
621 elif kind == stat.S_IFDIR and recurse:
621 elif kind == stat.S_IFDIR and recurse:
622 visit.append(fp)
622 visit.append(fp)
623
623
624 l.sort()
624 l.sort()
625 return l
625 return l
626
626
627 def changelog(self, trypending, concurrencychecker=None):
627 def changelog(self, trypending, concurrencychecker=None):
628 return changelog.changelog(
628 return changelog.changelog(
629 self.vfs,
629 self.vfs,
630 trypending=trypending,
630 trypending=trypending,
631 concurrencychecker=concurrencychecker,
631 concurrencychecker=concurrencychecker,
632 )
632 )
633
633
634 def manifestlog(self, repo, storenarrowmatch):
634 def manifestlog(self, repo, storenarrowmatch):
635 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
635 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
636 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
636 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
637
637
638 def datafiles(
638 def datafiles(
639 self, matcher=None, undecodable=None
639 self, matcher=None, undecodable=None
640 ) -> Generator[BaseStoreEntry, None, None]:
640 ) -> Generator[BaseStoreEntry, None, None]:
641 """Like walk, but excluding the changelog and root manifest.
641 """Like walk, but excluding the changelog and root manifest.
642
642
643 When [undecodable] is None, revlogs names that can't be
643 When [undecodable] is None, revlogs names that can't be
644 decoded cause an exception. When it is provided, it should
644 decoded cause an exception. When it is provided, it should
645 be a list and the filenames that can't be decoded are added
645 be a list and the filenames that can't be decoded are added
646 to it instead. This is very rarely needed."""
646 to it instead. This is very rarely needed."""
647 dirs = [
647 dirs = [
648 (b'data', FILEFLAGS_FILELOG),
648 (b'data', FILEFLAGS_FILELOG),
649 (b'meta', FILEFLAGS_MANIFESTLOG),
649 (b'meta', FILEFLAGS_MANIFESTLOG),
650 ]
650 ]
651 for base_dir, rl_type in dirs:
651 for base_dir, rl_type in dirs:
652 files = self._walk(base_dir, True, undecodable=undecodable)
652 files = self._walk(base_dir, True, undecodable=undecodable)
653 files = (f for f in files if f[1][0] is not None)
653 files = (f for f in files if f[1][0] is not None)
654 for revlog, details in _gather_revlog(files):
654 for revlog, details in _gather_revlog(files):
655 for ext, (t, s) in sorted(details.items()):
655 for ext, (t, s) in sorted(details.items()):
656 u = revlog + ext
656 u = revlog + ext
657 revlog_target_id = revlog.split(b'/', 1)[1]
657 revlog_target_id = revlog.split(b'/', 1)[1]
658 yield RevlogStoreEntry(
658 yield RevlogStoreEntry(
659 unencoded_path=u,
659 entry_path=u,
660 revlog_type=rl_type,
660 revlog_type=rl_type,
661 target_id=revlog_target_id,
661 target_id=revlog_target_id,
662 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
662 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
663 is_volatile=bool(t & FILEFLAGS_VOLATILE),
663 is_volatile=bool(t & FILEFLAGS_VOLATILE),
664 file_size=s,
664 file_size=s,
665 )
665 )
666
666
667 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
667 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
668 files = reversed(self._walk(b'', False))
668 files = reversed(self._walk(b'', False))
669
669
670 changelogs = collections.defaultdict(dict)
670 changelogs = collections.defaultdict(dict)
671 manifestlogs = collections.defaultdict(dict)
671 manifestlogs = collections.defaultdict(dict)
672
672
673 for u, (t, s) in files:
673 for u, (t, s) in files:
674 if u.startswith(b'00changelog'):
674 if u.startswith(b'00changelog'):
675 name, ext = _split_revlog_ext(u)
675 name, ext = _split_revlog_ext(u)
676 changelogs[name][ext] = (t, s)
676 changelogs[name][ext] = (t, s)
677 elif u.startswith(b'00manifest'):
677 elif u.startswith(b'00manifest'):
678 name, ext = _split_revlog_ext(u)
678 name, ext = _split_revlog_ext(u)
679 manifestlogs[name][ext] = (t, s)
679 manifestlogs[name][ext] = (t, s)
680 else:
680 else:
681 yield SimpleStoreEntry(
681 yield SimpleStoreEntry(
682 unencoded_path=u,
682 entry_path=u,
683 is_volatile=bool(t & FILEFLAGS_VOLATILE),
683 is_volatile=bool(t & FILEFLAGS_VOLATILE),
684 file_size=s,
684 file_size=s,
685 )
685 )
686 # yield manifest before changelog
686 # yield manifest before changelog
687 top_rl = [
687 top_rl = [
688 (manifestlogs, FILEFLAGS_MANIFESTLOG),
688 (manifestlogs, FILEFLAGS_MANIFESTLOG),
689 (changelogs, FILEFLAGS_CHANGELOG),
689 (changelogs, FILEFLAGS_CHANGELOG),
690 ]
690 ]
691 assert len(manifestlogs) <= 1
691 assert len(manifestlogs) <= 1
692 assert len(changelogs) <= 1
692 assert len(changelogs) <= 1
693 for data, revlog_type in top_rl:
693 for data, revlog_type in top_rl:
694 for revlog, details in sorted(data.items()):
694 for revlog, details in sorted(data.items()):
695 # (keeping ordering so we get 00changelog.i last)
695 # (keeping ordering so we get 00changelog.i last)
696 key = lambda x: _ext_key(x[0])
696 key = lambda x: _ext_key(x[0])
697 for ext, (t, s) in sorted(details.items(), key=key):
697 for ext, (t, s) in sorted(details.items(), key=key):
698 u = revlog + ext
698 u = revlog + ext
699 yield RevlogStoreEntry(
699 yield RevlogStoreEntry(
700 unencoded_path=u,
700 entry_path=u,
701 revlog_type=revlog_type,
701 revlog_type=revlog_type,
702 target_id=b'',
702 target_id=b'',
703 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
703 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
705 file_size=s,
705 file_size=s,
706 )
706 )
707
707
708 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
708 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
709 """return files related to data storage (ie: revlogs)
709 """return files related to data storage (ie: revlogs)
710
710
711 yields (file_type, unencoded, size)
711 yields (file_type, unencoded, size)
712
712
713 if a matcher is passed, storage files of only those tracked paths
713 if a matcher is passed, storage files of only those tracked paths
714 are passed with matches the matcher
714 are passed with matches the matcher
715 """
715 """
716 # yield data files first
716 # yield data files first
717 for x in self.datafiles(matcher):
717 for x in self.datafiles(matcher):
718 yield x
718 yield x
719 for x in self.topfiles():
719 for x in self.topfiles():
720 yield x
720 yield x
721
721
722 def copylist(self):
722 def copylist(self):
723 return _data
723 return _data
724
724
725 def write(self, tr):
725 def write(self, tr):
726 pass
726 pass
727
727
728 def invalidatecaches(self):
728 def invalidatecaches(self):
729 pass
729 pass
730
730
731 def markremoved(self, fn):
731 def markremoved(self, fn):
732 pass
732 pass
733
733
734 def __contains__(self, path):
734 def __contains__(self, path):
735 '''Checks if the store contains path'''
735 '''Checks if the store contains path'''
736 path = b"/".join((b"data", path))
736 path = b"/".join((b"data", path))
737 # file?
737 # file?
738 if self.vfs.exists(path + b".i"):
738 if self.vfs.exists(path + b".i"):
739 return True
739 return True
740 # dir?
740 # dir?
741 if not path.endswith(b"/"):
741 if not path.endswith(b"/"):
742 path = path + b"/"
742 path = path + b"/"
743 return self.vfs.exists(path)
743 return self.vfs.exists(path)
744
744
745
745
746 class encodedstore(basicstore):
746 class encodedstore(basicstore):
747 def __init__(self, path, vfstype):
747 def __init__(self, path, vfstype):
748 vfs = vfstype(path + b'/store')
748 vfs = vfstype(path + b'/store')
749 self.path = vfs.base
749 self.path = vfs.base
750 self.createmode = _calcmode(vfs)
750 self.createmode = _calcmode(vfs)
751 vfs.createmode = self.createmode
751 vfs.createmode = self.createmode
752 self.rawvfs = vfs
752 self.rawvfs = vfs
753 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
753 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
754 self.opener = self.vfs
754 self.opener = self.vfs
755
755
756 def _walk(self, relpath, recurse, undecodable=None):
756 def _walk(self, relpath, recurse, undecodable=None):
757 old = super()._walk(relpath, recurse)
757 old = super()._walk(relpath, recurse)
758 new = []
758 new = []
759 for f1, value in old:
759 for f1, value in old:
760 try:
760 try:
761 f2 = decodefilename(f1)
761 f2 = decodefilename(f1)
762 except KeyError:
762 except KeyError:
763 if undecodable is None:
763 if undecodable is None:
764 msg = _(b'undecodable revlog name %s') % f1
764 msg = _(b'undecodable revlog name %s') % f1
765 raise error.StorageError(msg)
765 raise error.StorageError(msg)
766 else:
766 else:
767 undecodable.append(f1)
767 undecodable.append(f1)
768 continue
768 continue
769 new.append((f2, value))
769 new.append((f2, value))
770 return new
770 return new
771
771
772 def datafiles(
772 def datafiles(
773 self, matcher=None, undecodable=None
773 self, matcher=None, undecodable=None
774 ) -> Generator[BaseStoreEntry, None, None]:
774 ) -> Generator[BaseStoreEntry, None, None]:
775 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
775 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
776 for entry in entries:
776 for entry in entries:
777 if _match_tracked_entry(entry, matcher):
777 if _match_tracked_entry(entry, matcher):
778 yield entry
778 yield entry
779
779
780 def join(self, f):
780 def join(self, f):
781 return self.path + b'/' + encodefilename(f)
781 return self.path + b'/' + encodefilename(f)
782
782
783 def copylist(self):
783 def copylist(self):
784 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
784 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
785
785
786
786
787 class fncache:
787 class fncache:
788 # the filename used to be partially encoded
788 # the filename used to be partially encoded
789 # hence the encodedir/decodedir dance
789 # hence the encodedir/decodedir dance
790 def __init__(self, vfs):
790 def __init__(self, vfs):
791 self.vfs = vfs
791 self.vfs = vfs
792 self._ignores = set()
792 self._ignores = set()
793 self.entries = None
793 self.entries = None
794 self._dirty = False
794 self._dirty = False
795 # set of new additions to fncache
795 # set of new additions to fncache
796 self.addls = set()
796 self.addls = set()
797
797
798 def ensureloaded(self, warn=None):
798 def ensureloaded(self, warn=None):
799 """read the fncache file if not already read.
799 """read the fncache file if not already read.
800
800
801 If the file on disk is corrupted, raise. If warn is provided,
801 If the file on disk is corrupted, raise. If warn is provided,
802 warn and keep going instead."""
802 warn and keep going instead."""
803 if self.entries is None:
803 if self.entries is None:
804 self._load(warn)
804 self._load(warn)
805
805
806 def _load(self, warn=None):
806 def _load(self, warn=None):
807 '''fill the entries from the fncache file'''
807 '''fill the entries from the fncache file'''
808 self._dirty = False
808 self._dirty = False
809 try:
809 try:
810 fp = self.vfs(b'fncache', mode=b'rb')
810 fp = self.vfs(b'fncache', mode=b'rb')
811 except IOError:
811 except IOError:
812 # skip nonexistent file
812 # skip nonexistent file
813 self.entries = set()
813 self.entries = set()
814 return
814 return
815
815
816 self.entries = set()
816 self.entries = set()
817 chunk = b''
817 chunk = b''
818 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
818 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
819 chunk += c
819 chunk += c
820 try:
820 try:
821 p = chunk.rindex(b'\n')
821 p = chunk.rindex(b'\n')
822 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
822 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
823 chunk = chunk[p + 1 :]
823 chunk = chunk[p + 1 :]
824 except ValueError:
824 except ValueError:
825 # substring '\n' not found, maybe the entry is bigger than the
825 # substring '\n' not found, maybe the entry is bigger than the
826 # chunksize, so let's keep iterating
826 # chunksize, so let's keep iterating
827 pass
827 pass
828
828
829 if chunk:
829 if chunk:
830 msg = _(b"fncache does not ends with a newline")
830 msg = _(b"fncache does not ends with a newline")
831 if warn:
831 if warn:
832 warn(msg + b'\n')
832 warn(msg + b'\n')
833 else:
833 else:
834 raise error.Abort(
834 raise error.Abort(
835 msg,
835 msg,
836 hint=_(
836 hint=_(
837 b"use 'hg debugrebuildfncache' to "
837 b"use 'hg debugrebuildfncache' to "
838 b"rebuild the fncache"
838 b"rebuild the fncache"
839 ),
839 ),
840 )
840 )
841 self._checkentries(fp, warn)
841 self._checkentries(fp, warn)
842 fp.close()
842 fp.close()
843
843
844 def _checkentries(self, fp, warn):
844 def _checkentries(self, fp, warn):
845 """make sure there is no empty string in entries"""
845 """make sure there is no empty string in entries"""
846 if b'' in self.entries:
846 if b'' in self.entries:
847 fp.seek(0)
847 fp.seek(0)
848 for n, line in enumerate(fp):
848 for n, line in enumerate(fp):
849 if not line.rstrip(b'\n'):
849 if not line.rstrip(b'\n'):
850 t = _(b'invalid entry in fncache, line %d') % (n + 1)
850 t = _(b'invalid entry in fncache, line %d') % (n + 1)
851 if warn:
851 if warn:
852 warn(t + b'\n')
852 warn(t + b'\n')
853 else:
853 else:
854 raise error.Abort(t)
854 raise error.Abort(t)
855
855
856 def write(self, tr):
856 def write(self, tr):
857 if self._dirty:
857 if self._dirty:
858 assert self.entries is not None
858 assert self.entries is not None
859 self.entries = self.entries | self.addls
859 self.entries = self.entries | self.addls
860 self.addls = set()
860 self.addls = set()
861 tr.addbackup(b'fncache')
861 tr.addbackup(b'fncache')
862 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
862 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
863 if self.entries:
863 if self.entries:
864 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
864 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
865 fp.close()
865 fp.close()
866 self._dirty = False
866 self._dirty = False
867 if self.addls:
867 if self.addls:
868 # if we have just new entries, let's append them to the fncache
868 # if we have just new entries, let's append them to the fncache
869 tr.addbackup(b'fncache')
869 tr.addbackup(b'fncache')
870 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
870 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
871 if self.addls:
871 if self.addls:
872 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
872 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
873 fp.close()
873 fp.close()
874 self.entries = None
874 self.entries = None
875 self.addls = set()
875 self.addls = set()
876
876
877 def addignore(self, fn):
877 def addignore(self, fn):
878 self._ignores.add(fn)
878 self._ignores.add(fn)
879
879
880 def add(self, fn):
880 def add(self, fn):
881 if fn in self._ignores:
881 if fn in self._ignores:
882 return
882 return
883 if self.entries is None:
883 if self.entries is None:
884 self._load()
884 self._load()
885 if fn not in self.entries:
885 if fn not in self.entries:
886 self.addls.add(fn)
886 self.addls.add(fn)
887
887
888 def remove(self, fn):
888 def remove(self, fn):
889 if self.entries is None:
889 if self.entries is None:
890 self._load()
890 self._load()
891 if fn in self.addls:
891 if fn in self.addls:
892 self.addls.remove(fn)
892 self.addls.remove(fn)
893 return
893 return
894 try:
894 try:
895 self.entries.remove(fn)
895 self.entries.remove(fn)
896 self._dirty = True
896 self._dirty = True
897 except KeyError:
897 except KeyError:
898 pass
898 pass
899
899
900 def __contains__(self, fn):
900 def __contains__(self, fn):
901 if fn in self.addls:
901 if fn in self.addls:
902 return True
902 return True
903 if self.entries is None:
903 if self.entries is None:
904 self._load()
904 self._load()
905 return fn in self.entries
905 return fn in self.entries
906
906
907 def __iter__(self):
907 def __iter__(self):
908 if self.entries is None:
908 if self.entries is None:
909 self._load()
909 self._load()
910 return iter(self.entries | self.addls)
910 return iter(self.entries | self.addls)
911
911
912
912
913 class _fncachevfs(vfsmod.proxyvfs):
913 class _fncachevfs(vfsmod.proxyvfs):
914 def __init__(self, vfs, fnc, encode):
914 def __init__(self, vfs, fnc, encode):
915 vfsmod.proxyvfs.__init__(self, vfs)
915 vfsmod.proxyvfs.__init__(self, vfs)
916 self.fncache = fnc
916 self.fncache = fnc
917 self.encode = encode
917 self.encode = encode
918
918
919 def __call__(self, path, mode=b'r', *args, **kw):
919 def __call__(self, path, mode=b'r', *args, **kw):
920 encoded = self.encode(path)
920 encoded = self.encode(path)
921 if (
921 if (
922 mode not in (b'r', b'rb')
922 mode not in (b'r', b'rb')
923 and (path.startswith(b'data/') or path.startswith(b'meta/'))
923 and (path.startswith(b'data/') or path.startswith(b'meta/'))
924 and revlog_type(path) is not None
924 and revlog_type(path) is not None
925 ):
925 ):
926 # do not trigger a fncache load when adding a file that already is
926 # do not trigger a fncache load when adding a file that already is
927 # known to exist.
927 # known to exist.
928 notload = self.fncache.entries is None and self.vfs.exists(encoded)
928 notload = self.fncache.entries is None and self.vfs.exists(encoded)
929 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
929 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
930 # when appending to an existing file, if the file has size zero,
930 # when appending to an existing file, if the file has size zero,
931 # it should be considered as missing. Such zero-size files are
931 # it should be considered as missing. Such zero-size files are
932 # the result of truncation when a transaction is aborted.
932 # the result of truncation when a transaction is aborted.
933 notload = False
933 notload = False
934 if not notload:
934 if not notload:
935 self.fncache.add(path)
935 self.fncache.add(path)
936 return self.vfs(encoded, mode, *args, **kw)
936 return self.vfs(encoded, mode, *args, **kw)
937
937
938 def join(self, path):
938 def join(self, path):
939 if path:
939 if path:
940 return self.vfs.join(self.encode(path))
940 return self.vfs.join(self.encode(path))
941 else:
941 else:
942 return self.vfs.join(path)
942 return self.vfs.join(path)
943
943
944 def register_file(self, path):
944 def register_file(self, path):
945 """generic hook point to lets fncache steer its stew"""
945 """generic hook point to lets fncache steer its stew"""
946 if path.startswith(b'data/') or path.startswith(b'meta/'):
946 if path.startswith(b'data/') or path.startswith(b'meta/'):
947 self.fncache.add(path)
947 self.fncache.add(path)
948
948
949
949
950 class fncachestore(basicstore):
950 class fncachestore(basicstore):
951 def __init__(self, path, vfstype, dotencode):
951 def __init__(self, path, vfstype, dotencode):
952 if dotencode:
952 if dotencode:
953 encode = _pathencode
953 encode = _pathencode
954 else:
954 else:
955 encode = _plainhybridencode
955 encode = _plainhybridencode
956 self.encode = encode
956 self.encode = encode
957 vfs = vfstype(path + b'/store')
957 vfs = vfstype(path + b'/store')
958 self.path = vfs.base
958 self.path = vfs.base
959 self.pathsep = self.path + b'/'
959 self.pathsep = self.path + b'/'
960 self.createmode = _calcmode(vfs)
960 self.createmode = _calcmode(vfs)
961 vfs.createmode = self.createmode
961 vfs.createmode = self.createmode
962 self.rawvfs = vfs
962 self.rawvfs = vfs
963 fnc = fncache(vfs)
963 fnc = fncache(vfs)
964 self.fncache = fnc
964 self.fncache = fnc
965 self.vfs = _fncachevfs(vfs, fnc, encode)
965 self.vfs = _fncachevfs(vfs, fnc, encode)
966 self.opener = self.vfs
966 self.opener = self.vfs
967
967
968 def join(self, f):
968 def join(self, f):
969 return self.pathsep + self.encode(f)
969 return self.pathsep + self.encode(f)
970
970
971 def getsize(self, path):
971 def getsize(self, path):
972 return self.rawvfs.stat(path).st_size
972 return self.rawvfs.stat(path).st_size
973
973
974 def datafiles(
974 def datafiles(
975 self, matcher=None, undecodable=None
975 self, matcher=None, undecodable=None
976 ) -> Generator[BaseStoreEntry, None, None]:
976 ) -> Generator[BaseStoreEntry, None, None]:
977 files = ((f, revlog_type(f)) for f in self.fncache)
977 files = ((f, revlog_type(f)) for f in self.fncache)
978 # Note: all files in fncache should be revlog related, However the
978 # Note: all files in fncache should be revlog related, However the
979 # fncache might contains such file added by previous version of
979 # fncache might contains such file added by previous version of
980 # Mercurial.
980 # Mercurial.
981 files = (f for f in files if f[1] is not None)
981 files = (f for f in files if f[1] is not None)
982 by_revlog = _gather_revlog(files)
982 by_revlog = _gather_revlog(files)
983 for revlog, details in by_revlog:
983 for revlog, details in by_revlog:
984 if revlog.startswith(b'data/'):
984 if revlog.startswith(b'data/'):
985 rl_type = FILEFLAGS_FILELOG
985 rl_type = FILEFLAGS_FILELOG
986 revlog_target_id = revlog.split(b'/', 1)[1]
986 revlog_target_id = revlog.split(b'/', 1)[1]
987 elif revlog.startswith(b'meta/'):
987 elif revlog.startswith(b'meta/'):
988 rl_type = FILEFLAGS_MANIFESTLOG
988 rl_type = FILEFLAGS_MANIFESTLOG
989 # drop the initial directory and the `00manifest` file part
989 # drop the initial directory and the `00manifest` file part
990 tmp = revlog.split(b'/', 1)[1]
990 tmp = revlog.split(b'/', 1)[1]
991 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
991 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
992 else:
992 else:
993 # unreachable
993 # unreachable
994 assert False, revlog
994 assert False, revlog
995 for ext, t in sorted(details.items()):
995 for ext, t in sorted(details.items()):
996 f = revlog + ext
996 f = revlog + ext
997 entry = RevlogStoreEntry(
997 entry = RevlogStoreEntry(
998 unencoded_path=f,
998 entry_path=f,
999 revlog_type=rl_type,
999 revlog_type=rl_type,
1000 target_id=revlog_target_id,
1000 target_id=revlog_target_id,
1001 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1001 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1002 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1002 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1003 )
1003 )
1004 if _match_tracked_entry(entry, matcher):
1004 if _match_tracked_entry(entry, matcher):
1005 yield entry
1005 yield entry
1006
1006
1007 def copylist(self):
1007 def copylist(self):
1008 d = (
1008 d = (
1009 b'bookmarks',
1009 b'bookmarks',
1010 b'narrowspec',
1010 b'narrowspec',
1011 b'data',
1011 b'data',
1012 b'meta',
1012 b'meta',
1013 b'dh',
1013 b'dh',
1014 b'fncache',
1014 b'fncache',
1015 b'phaseroots',
1015 b'phaseroots',
1016 b'obsstore',
1016 b'obsstore',
1017 b'00manifest.d',
1017 b'00manifest.d',
1018 b'00manifest.i',
1018 b'00manifest.i',
1019 b'00changelog.d',
1019 b'00changelog.d',
1020 b'00changelog.i',
1020 b'00changelog.i',
1021 b'requires',
1021 b'requires',
1022 )
1022 )
1023 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1023 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1024
1024
1025 def write(self, tr):
1025 def write(self, tr):
1026 self.fncache.write(tr)
1026 self.fncache.write(tr)
1027
1027
1028 def invalidatecaches(self):
1028 def invalidatecaches(self):
1029 self.fncache.entries = None
1029 self.fncache.entries = None
1030 self.fncache.addls = set()
1030 self.fncache.addls = set()
1031
1031
1032 def markremoved(self, fn):
1032 def markremoved(self, fn):
1033 self.fncache.remove(fn)
1033 self.fncache.remove(fn)
1034
1034
1035 def _exists(self, f):
1035 def _exists(self, f):
1036 ef = self.encode(f)
1036 ef = self.encode(f)
1037 try:
1037 try:
1038 self.getsize(ef)
1038 self.getsize(ef)
1039 return True
1039 return True
1040 except FileNotFoundError:
1040 except FileNotFoundError:
1041 return False
1041 return False
1042
1042
1043 def __contains__(self, path):
1043 def __contains__(self, path):
1044 '''Checks if the store contains path'''
1044 '''Checks if the store contains path'''
1045 path = b"/".join((b"data", path))
1045 path = b"/".join((b"data", path))
1046 # check for files (exact match)
1046 # check for files (exact match)
1047 e = path + b'.i'
1047 e = path + b'.i'
1048 if e in self.fncache and self._exists(e):
1048 if e in self.fncache and self._exists(e):
1049 return True
1049 return True
1050 # now check for directories (prefix match)
1050 # now check for directories (prefix match)
1051 if not path.endswith(b'/'):
1051 if not path.endswith(b'/'):
1052 path += b'/'
1052 path += b'/'
1053 for e in self.fncache:
1053 for e in self.fncache:
1054 if e.startswith(path) and self._exists(e):
1054 if e.startswith(path) and self._exists(e):
1055 return True
1055 return True
1056 return False
1056 return False
General Comments 0
You need to be logged in to leave comments. Login now