##// END OF EJS Templates
store: use specialized class for store entries...
marmoute -
r51366:5a2fb64d default
parent child Browse files
Show More
@@ -1,446 +1,443
1 # remotefilelogserver.py - server logic for a remotefilelog server
1 # remotefilelogserver.py - server logic for a remotefilelog server
2 #
2 #
3 # Copyright 2013 Facebook, Inc.
3 # Copyright 2013 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 import os
8 import os
9 import stat
9 import stat
10 import time
10 import time
11 import zlib
11 import zlib
12
12
13 from mercurial.i18n import _
13 from mercurial.i18n import _
14 from mercurial.node import bin, hex
14 from mercurial.node import bin, hex
15 from mercurial.pycompat import open
15 from mercurial.pycompat import open
16 from mercurial import (
16 from mercurial import (
17 changegroup,
17 changegroup,
18 changelog,
18 changelog,
19 context,
19 context,
20 error,
20 error,
21 extensions,
21 extensions,
22 match,
22 match,
23 scmutil,
23 scmutil,
24 store,
24 store,
25 streamclone,
25 streamclone,
26 util,
26 util,
27 wireprotoserver,
27 wireprotoserver,
28 wireprototypes,
28 wireprototypes,
29 wireprotov1server,
29 wireprotov1server,
30 )
30 )
31 from . import (
31 from . import (
32 constants,
32 constants,
33 shallowutil,
33 shallowutil,
34 )
34 )
35
35
36 _sshv1server = wireprotoserver.sshv1protocolhandler
36 _sshv1server = wireprotoserver.sshv1protocolhandler
37
37
38
38
39 def setupserver(ui, repo):
39 def setupserver(ui, repo):
40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
41 onetimesetup(ui)
41 onetimesetup(ui)
42
42
43 # don't send files to shallow clients during pulls
43 # don't send files to shallow clients during pulls
44 def generatefiles(
44 def generatefiles(
45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
46 ):
46 ):
47 caps = self._bundlecaps or []
47 caps = self._bundlecaps or []
48 if constants.BUNDLE2_CAPABLITY in caps:
48 if constants.BUNDLE2_CAPABLITY in caps:
49 # only send files that don't match the specified patterns
49 # only send files that don't match the specified patterns
50 includepattern = None
50 includepattern = None
51 excludepattern = None
51 excludepattern = None
52 for cap in self._bundlecaps or []:
52 for cap in self._bundlecaps or []:
53 if cap.startswith(b"includepattern="):
53 if cap.startswith(b"includepattern="):
54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
55 elif cap.startswith(b"excludepattern="):
55 elif cap.startswith(b"excludepattern="):
56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
57
57
58 m = match.always()
58 m = match.always()
59 if includepattern or excludepattern:
59 if includepattern or excludepattern:
60 m = match.match(
60 m = match.match(
61 repo.root, b'', None, includepattern, excludepattern
61 repo.root, b'', None, includepattern, excludepattern
62 )
62 )
63
63
64 changedfiles = list([f for f in changedfiles if not m(f)])
64 changedfiles = list([f for f in changedfiles if not m(f)])
65 return orig(
65 return orig(
66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
67 )
67 )
68
68
69 extensions.wrapfunction(
69 extensions.wrapfunction(
70 changegroup.cgpacker, b'generatefiles', generatefiles
70 changegroup.cgpacker, b'generatefiles', generatefiles
71 )
71 )
72
72
73
73
74 onetime = False
74 onetime = False
75
75
76
76
77 def onetimesetup(ui):
77 def onetimesetup(ui):
78 """Configures the wireprotocol for both clients and servers."""
78 """Configures the wireprotocol for both clients and servers."""
79 global onetime
79 global onetime
80 if onetime:
80 if onetime:
81 return
81 return
82 onetime = True
82 onetime = True
83
83
84 # support file content requests
84 # support file content requests
85 wireprotov1server.wireprotocommand(
85 wireprotov1server.wireprotocommand(
86 b'x_rfl_getflogheads', b'path', permission=b'pull'
86 b'x_rfl_getflogheads', b'path', permission=b'pull'
87 )(getflogheads)
87 )(getflogheads)
88 wireprotov1server.wireprotocommand(
88 wireprotov1server.wireprotocommand(
89 b'x_rfl_getfiles', b'', permission=b'pull'
89 b'x_rfl_getfiles', b'', permission=b'pull'
90 )(getfiles)
90 )(getfiles)
91 wireprotov1server.wireprotocommand(
91 wireprotov1server.wireprotocommand(
92 b'x_rfl_getfile', b'file node', permission=b'pull'
92 b'x_rfl_getfile', b'file node', permission=b'pull'
93 )(getfile)
93 )(getfile)
94
94
95 class streamstate:
95 class streamstate:
96 match = None
96 match = None
97 shallowremote = False
97 shallowremote = False
98 noflatmf = False
98 noflatmf = False
99
99
100 state = streamstate()
100 state = streamstate()
101
101
102 def stream_out_shallow(repo, proto, other):
102 def stream_out_shallow(repo, proto, other):
103 includepattern = None
103 includepattern = None
104 excludepattern = None
104 excludepattern = None
105 raw = other.get(b'includepattern')
105 raw = other.get(b'includepattern')
106 if raw:
106 if raw:
107 includepattern = raw.split(b'\0')
107 includepattern = raw.split(b'\0')
108 raw = other.get(b'excludepattern')
108 raw = other.get(b'excludepattern')
109 if raw:
109 if raw:
110 excludepattern = raw.split(b'\0')
110 excludepattern = raw.split(b'\0')
111
111
112 oldshallow = state.shallowremote
112 oldshallow = state.shallowremote
113 oldmatch = state.match
113 oldmatch = state.match
114 oldnoflatmf = state.noflatmf
114 oldnoflatmf = state.noflatmf
115 try:
115 try:
116 state.shallowremote = True
116 state.shallowremote = True
117 state.match = match.always()
117 state.match = match.always()
118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
119 if includepattern or excludepattern:
119 if includepattern or excludepattern:
120 state.match = match.match(
120 state.match = match.match(
121 repo.root, b'', None, includepattern, excludepattern
121 repo.root, b'', None, includepattern, excludepattern
122 )
122 )
123 streamres = wireprotov1server.stream(repo, proto)
123 streamres = wireprotov1server.stream(repo, proto)
124
124
125 # Force the first value to execute, so the file list is computed
125 # Force the first value to execute, so the file list is computed
126 # within the try/finally scope
126 # within the try/finally scope
127 first = next(streamres.gen)
127 first = next(streamres.gen)
128 second = next(streamres.gen)
128 second = next(streamres.gen)
129
129
130 def gen():
130 def gen():
131 yield first
131 yield first
132 yield second
132 yield second
133 for value in streamres.gen:
133 for value in streamres.gen:
134 yield value
134 yield value
135
135
136 return wireprototypes.streamres(gen())
136 return wireprototypes.streamres(gen())
137 finally:
137 finally:
138 state.shallowremote = oldshallow
138 state.shallowremote = oldshallow
139 state.match = oldmatch
139 state.match = oldmatch
140 state.noflatmf = oldnoflatmf
140 state.noflatmf = oldnoflatmf
141
141
142 wireprotov1server.commands[b'stream_out_shallow'] = (
142 wireprotov1server.commands[b'stream_out_shallow'] = (
143 stream_out_shallow,
143 stream_out_shallow,
144 b'*',
144 b'*',
145 )
145 )
146
146
147 # don't clone filelogs to shallow clients
147 # don't clone filelogs to shallow clients
148 def _walkstreamfiles(orig, repo, matcher=None):
148 def _walkstreamfiles(orig, repo, matcher=None):
149 if state.shallowremote:
149 if state.shallowremote:
150 # if we are shallow ourselves, stream our local commits
150 # if we are shallow ourselves, stream our local commits
151 if shallowutil.isenabled(repo):
151 if shallowutil.isenabled(repo):
152 striplen = len(repo.store.path) + 1
152 striplen = len(repo.store.path) + 1
153 readdir = repo.store.rawvfs.readdir
153 readdir = repo.store.rawvfs.readdir
154 visit = [os.path.join(repo.store.path, b'data')]
154 visit = [os.path.join(repo.store.path, b'data')]
155 while visit:
155 while visit:
156 p = visit.pop()
156 p = visit.pop()
157 for f, kind, st in readdir(p, stat=True):
157 for f, kind, st in readdir(p, stat=True):
158 fp = p + b'/' + f
158 fp = p + b'/' + f
159 if kind == stat.S_IFREG:
159 if kind == stat.S_IFREG:
160 if not fp.endswith(b'.i') and not fp.endswith(
160 if not fp.endswith(b'.i') and not fp.endswith(
161 b'.d'
161 b'.d'
162 ):
162 ):
163 n = util.pconvert(fp[striplen:])
163 n = util.pconvert(fp[striplen:])
164 d = store.decodedir(n)
164 d = store.decodedir(n)
165 yield store.StoreEntry(
165 yield store.SimpleStoreEntry(
166 unencoded_path=d,
166 unencoded_path=d,
167 is_revlog=True,
168 revlog_type=None,
169 is_revlog_main=False,
170 is_volatile=False,
167 is_volatile=False,
171 file_size=st.st_size,
168 file_size=st.st_size,
172 )
169 )
173
170
174 if kind == stat.S_IFDIR:
171 if kind == stat.S_IFDIR:
175 visit.append(fp)
172 visit.append(fp)
176
173
177 if scmutil.istreemanifest(repo):
174 if scmutil.istreemanifest(repo):
178 for entry in repo.store.datafiles():
175 for entry in repo.store.datafiles():
179 u = entry.unencoded_path
176 u = entry.unencoded_path
180 if u.startswith(b'meta/') and (
177 if u.startswith(b'meta/') and (
181 u.endswith(b'.i') or u.endswith(b'.d')
178 u.endswith(b'.i') or u.endswith(b'.d')
182 ):
179 ):
183 yield entry
180 yield entry
184
181
185 # Return .d and .i files that do not match the shallow pattern
182 # Return .d and .i files that do not match the shallow pattern
186 match = state.match
183 match = state.match
187 if match and not match.always():
184 if match and not match.always():
188 for entry in repo.store.datafiles():
185 for entry in repo.store.datafiles():
189 u = entry.unencoded_path
186 u = entry.unencoded_path
190 f = u[5:-2] # trim data/... and .i/.d
187 f = u[5:-2] # trim data/... and .i/.d
191 if not state.match(f):
188 if not state.match(f):
192 yield entry
189 yield entry
193
190
194 for x in repo.store.topfiles():
191 for x in repo.store.topfiles():
195 if state.noflatmf and x[1][:11] == b'00manifest.':
192 if state.noflatmf and x[1][:11] == b'00manifest.':
196 continue
193 continue
197 yield x
194 yield x
198
195
199 elif shallowutil.isenabled(repo):
196 elif shallowutil.isenabled(repo):
200 # don't allow cloning from a shallow repo to a full repo
197 # don't allow cloning from a shallow repo to a full repo
201 # since it would require fetching every version of every
198 # since it would require fetching every version of every
202 # file in order to create the revlogs.
199 # file in order to create the revlogs.
203 raise error.Abort(
200 raise error.Abort(
204 _(b"Cannot clone from a shallow repo to a full repo.")
201 _(b"Cannot clone from a shallow repo to a full repo.")
205 )
202 )
206 else:
203 else:
207 for x in orig(repo, matcher):
204 for x in orig(repo, matcher):
208 yield x
205 yield x
209
206
210 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
207 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
211
208
212 # expose remotefilelog capabilities
209 # expose remotefilelog capabilities
213 def _capabilities(orig, repo, proto):
210 def _capabilities(orig, repo, proto):
214 caps = orig(repo, proto)
211 caps = orig(repo, proto)
215 if shallowutil.isenabled(repo) or ui.configbool(
212 if shallowutil.isenabled(repo) or ui.configbool(
216 b'remotefilelog', b'server'
213 b'remotefilelog', b'server'
217 ):
214 ):
218 if isinstance(proto, _sshv1server):
215 if isinstance(proto, _sshv1server):
219 # legacy getfiles method which only works over ssh
216 # legacy getfiles method which only works over ssh
220 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
217 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
221 caps.append(b'x_rfl_getflogheads')
218 caps.append(b'x_rfl_getflogheads')
222 caps.append(b'x_rfl_getfile')
219 caps.append(b'x_rfl_getfile')
223 return caps
220 return caps
224
221
225 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
222 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
226
223
227 def _adjustlinkrev(orig, self, *args, **kwargs):
224 def _adjustlinkrev(orig, self, *args, **kwargs):
228 # When generating file blobs, taking the real path is too slow on large
225 # When generating file blobs, taking the real path is too slow on large
229 # repos, so force it to just return the linkrev directly.
226 # repos, so force it to just return the linkrev directly.
230 repo = self._repo
227 repo = self._repo
231 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
228 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
232 return self._filelog.linkrev(self._filelog.rev(self._filenode))
229 return self._filelog.linkrev(self._filelog.rev(self._filenode))
233 return orig(self, *args, **kwargs)
230 return orig(self, *args, **kwargs)
234
231
235 extensions.wrapfunction(
232 extensions.wrapfunction(
236 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
233 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
237 )
234 )
238
235
239 def _iscmd(orig, cmd):
236 def _iscmd(orig, cmd):
240 if cmd == b'x_rfl_getfiles':
237 if cmd == b'x_rfl_getfiles':
241 return False
238 return False
242 return orig(cmd)
239 return orig(cmd)
243
240
244 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
241 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
245
242
246
243
247 def _loadfileblob(repo, cachepath, path, node):
244 def _loadfileblob(repo, cachepath, path, node):
248 filecachepath = os.path.join(cachepath, path, hex(node))
245 filecachepath = os.path.join(cachepath, path, hex(node))
249 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
246 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
250 filectx = repo.filectx(path, fileid=node)
247 filectx = repo.filectx(path, fileid=node)
251 if filectx.node() == repo.nullid:
248 if filectx.node() == repo.nullid:
252 repo.changelog = changelog.changelog(repo.svfs)
249 repo.changelog = changelog.changelog(repo.svfs)
253 filectx = repo.filectx(path, fileid=node)
250 filectx = repo.filectx(path, fileid=node)
254
251
255 text = createfileblob(filectx)
252 text = createfileblob(filectx)
256 # TODO configurable compression engines
253 # TODO configurable compression engines
257 text = zlib.compress(text)
254 text = zlib.compress(text)
258
255
259 # everything should be user & group read/writable
256 # everything should be user & group read/writable
260 oldumask = os.umask(0o002)
257 oldumask = os.umask(0o002)
261 try:
258 try:
262 dirname = os.path.dirname(filecachepath)
259 dirname = os.path.dirname(filecachepath)
263 if not os.path.exists(dirname):
260 if not os.path.exists(dirname):
264 try:
261 try:
265 os.makedirs(dirname)
262 os.makedirs(dirname)
266 except FileExistsError:
263 except FileExistsError:
267 pass
264 pass
268
265
269 f = None
266 f = None
270 try:
267 try:
271 f = util.atomictempfile(filecachepath, b"wb")
268 f = util.atomictempfile(filecachepath, b"wb")
272 f.write(text)
269 f.write(text)
273 except (IOError, OSError):
270 except (IOError, OSError):
274 # Don't abort if the user only has permission to read,
271 # Don't abort if the user only has permission to read,
275 # and not write.
272 # and not write.
276 pass
273 pass
277 finally:
274 finally:
278 if f:
275 if f:
279 f.close()
276 f.close()
280 finally:
277 finally:
281 os.umask(oldumask)
278 os.umask(oldumask)
282 else:
279 else:
283 with open(filecachepath, b"rb") as f:
280 with open(filecachepath, b"rb") as f:
284 text = f.read()
281 text = f.read()
285 return text
282 return text
286
283
287
284
288 def getflogheads(repo, proto, path):
285 def getflogheads(repo, proto, path):
289 """A server api for requesting a filelog's heads"""
286 """A server api for requesting a filelog's heads"""
290 flog = repo.file(path)
287 flog = repo.file(path)
291 heads = flog.heads()
288 heads = flog.heads()
292 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
289 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
293
290
294
291
295 def getfile(repo, proto, file, node):
292 def getfile(repo, proto, file, node):
296 """A server api for requesting a particular version of a file. Can be used
293 """A server api for requesting a particular version of a file. Can be used
297 in batches to request many files at once. The return protocol is:
294 in batches to request many files at once. The return protocol is:
298 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
295 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
299 non-zero for an error.
296 non-zero for an error.
300
297
301 data is a compressed blob with revlog flag and ancestors information. See
298 data is a compressed blob with revlog flag and ancestors information. See
302 createfileblob for its content.
299 createfileblob for its content.
303 """
300 """
304 if shallowutil.isenabled(repo):
301 if shallowutil.isenabled(repo):
305 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
302 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
306 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
303 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
307 if not cachepath:
304 if not cachepath:
308 cachepath = os.path.join(repo.path, b"remotefilelogcache")
305 cachepath = os.path.join(repo.path, b"remotefilelogcache")
309 node = bin(node.strip())
306 node = bin(node.strip())
310 if node == repo.nullid:
307 if node == repo.nullid:
311 return b'0\0'
308 return b'0\0'
312 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
309 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
313
310
314
311
315 def getfiles(repo, proto):
312 def getfiles(repo, proto):
316 """A server api for requesting particular versions of particular files."""
313 """A server api for requesting particular versions of particular files."""
317 if shallowutil.isenabled(repo):
314 if shallowutil.isenabled(repo):
318 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
315 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
319 if not isinstance(proto, _sshv1server):
316 if not isinstance(proto, _sshv1server):
320 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
317 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
321
318
322 def streamer():
319 def streamer():
323 fin = proto._fin
320 fin = proto._fin
324
321
325 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
322 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
326 if not cachepath:
323 if not cachepath:
327 cachepath = os.path.join(repo.path, b"remotefilelogcache")
324 cachepath = os.path.join(repo.path, b"remotefilelogcache")
328
325
329 while True:
326 while True:
330 request = fin.readline()[:-1]
327 request = fin.readline()[:-1]
331 if not request:
328 if not request:
332 break
329 break
333
330
334 node = bin(request[:40])
331 node = bin(request[:40])
335 if node == repo.nullid:
332 if node == repo.nullid:
336 yield b'0\n'
333 yield b'0\n'
337 continue
334 continue
338
335
339 path = request[40:]
336 path = request[40:]
340
337
341 text = _loadfileblob(repo, cachepath, path, node)
338 text = _loadfileblob(repo, cachepath, path, node)
342
339
343 yield b'%d\n%s' % (len(text), text)
340 yield b'%d\n%s' % (len(text), text)
344
341
345 # it would be better to only flush after processing a whole batch
342 # it would be better to only flush after processing a whole batch
346 # but currently we don't know if there are more requests coming
343 # but currently we don't know if there are more requests coming
347 proto._fout.flush()
344 proto._fout.flush()
348
345
349 return wireprototypes.streamres(streamer())
346 return wireprototypes.streamres(streamer())
350
347
351
348
352 def createfileblob(filectx):
349 def createfileblob(filectx):
353 """
350 """
354 format:
351 format:
355 v0:
352 v0:
356 str(len(rawtext)) + '\0' + rawtext + ancestortext
353 str(len(rawtext)) + '\0' + rawtext + ancestortext
357 v1:
354 v1:
358 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
355 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
359 metalist := metalist + '\n' + meta | meta
356 metalist := metalist + '\n' + meta | meta
360 meta := sizemeta | flagmeta
357 meta := sizemeta | flagmeta
361 sizemeta := METAKEYSIZE + str(len(rawtext))
358 sizemeta := METAKEYSIZE + str(len(rawtext))
362 flagmeta := METAKEYFLAG + str(flag)
359 flagmeta := METAKEYFLAG + str(flag)
363
360
364 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
361 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
365 length of 1.
362 length of 1.
366 """
363 """
367 flog = filectx.filelog()
364 flog = filectx.filelog()
368 frev = filectx.filerev()
365 frev = filectx.filerev()
369 revlogflags = flog._revlog.flags(frev)
366 revlogflags = flog._revlog.flags(frev)
370 if revlogflags == 0:
367 if revlogflags == 0:
371 # normal files
368 # normal files
372 text = filectx.data()
369 text = filectx.data()
373 else:
370 else:
374 # lfs, read raw revision data
371 # lfs, read raw revision data
375 text = flog.rawdata(frev)
372 text = flog.rawdata(frev)
376
373
377 repo = filectx._repo
374 repo = filectx._repo
378
375
379 ancestors = [filectx]
376 ancestors = [filectx]
380
377
381 try:
378 try:
382 repo.forcelinkrev = True
379 repo.forcelinkrev = True
383 ancestors.extend([f for f in filectx.ancestors()])
380 ancestors.extend([f for f in filectx.ancestors()])
384
381
385 ancestortext = b""
382 ancestortext = b""
386 for ancestorctx in ancestors:
383 for ancestorctx in ancestors:
387 parents = ancestorctx.parents()
384 parents = ancestorctx.parents()
388 p1 = repo.nullid
385 p1 = repo.nullid
389 p2 = repo.nullid
386 p2 = repo.nullid
390 if len(parents) > 0:
387 if len(parents) > 0:
391 p1 = parents[0].filenode()
388 p1 = parents[0].filenode()
392 if len(parents) > 1:
389 if len(parents) > 1:
393 p2 = parents[1].filenode()
390 p2 = parents[1].filenode()
394
391
395 copyname = b""
392 copyname = b""
396 rename = ancestorctx.renamed()
393 rename = ancestorctx.renamed()
397 if rename:
394 if rename:
398 copyname = rename[0]
395 copyname = rename[0]
399 linknode = ancestorctx.node()
396 linknode = ancestorctx.node()
400 ancestortext += b"%s%s%s%s%s\0" % (
397 ancestortext += b"%s%s%s%s%s\0" % (
401 ancestorctx.filenode(),
398 ancestorctx.filenode(),
402 p1,
399 p1,
403 p2,
400 p2,
404 linknode,
401 linknode,
405 copyname,
402 copyname,
406 )
403 )
407 finally:
404 finally:
408 repo.forcelinkrev = False
405 repo.forcelinkrev = False
409
406
410 header = shallowutil.buildfileblobheader(len(text), revlogflags)
407 header = shallowutil.buildfileblobheader(len(text), revlogflags)
411
408
412 return b"%s\0%s%s" % (header, text, ancestortext)
409 return b"%s\0%s%s" % (header, text, ancestortext)
413
410
414
411
415 def gcserver(ui, repo):
412 def gcserver(ui, repo):
416 if not repo.ui.configbool(b"remotefilelog", b"server"):
413 if not repo.ui.configbool(b"remotefilelog", b"server"):
417 return
414 return
418
415
419 neededfiles = set()
416 neededfiles = set()
420 heads = repo.revs(b"heads(tip~25000:) - null")
417 heads = repo.revs(b"heads(tip~25000:) - null")
421
418
422 cachepath = repo.vfs.join(b"remotefilelogcache")
419 cachepath = repo.vfs.join(b"remotefilelogcache")
423 for head in heads:
420 for head in heads:
424 mf = repo[head].manifest()
421 mf = repo[head].manifest()
425 for filename, filenode in mf.items():
422 for filename, filenode in mf.items():
426 filecachepath = os.path.join(cachepath, filename, hex(filenode))
423 filecachepath = os.path.join(cachepath, filename, hex(filenode))
427 neededfiles.add(filecachepath)
424 neededfiles.add(filecachepath)
428
425
429 # delete unneeded older files
426 # delete unneeded older files
430 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
427 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
431 expiration = time.time() - (days * 24 * 60 * 60)
428 expiration = time.time() - (days * 24 * 60 * 60)
432
429
433 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
430 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
434 progress.update(0)
431 progress.update(0)
435 for root, dirs, files in os.walk(cachepath):
432 for root, dirs, files in os.walk(cachepath):
436 for file in files:
433 for file in files:
437 filepath = os.path.join(root, file)
434 filepath = os.path.join(root, file)
438 progress.increment()
435 progress.increment()
439 if filepath in neededfiles:
436 if filepath in neededfiles:
440 continue
437 continue
441
438
442 stat = os.stat(filepath)
439 stat = os.stat(filepath)
443 if stat.st_mtime < expiration:
440 if stat.st_mtime < expiration:
444 os.remove(filepath)
441 os.remove(filepath)
445
442
446 progress.complete()
443 progress.complete()
@@ -1,919 +1,937
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import functools
9 import functools
10 import os
10 import os
11 import re
11 import re
12 import stat
12 import stat
13 from typing import Generator
13 from typing import Generator
14
14
15 from .i18n import _
15 from .i18n import _
16 from .pycompat import getattr
16 from .pycompat import getattr
17 from .thirdparty import attr
17 from .thirdparty import attr
18 from .node import hex
18 from .node import hex
19 from . import (
19 from . import (
20 changelog,
20 changelog,
21 error,
21 error,
22 manifest,
22 manifest,
23 policy,
23 policy,
24 pycompat,
24 pycompat,
25 util,
25 util,
26 vfs as vfsmod,
26 vfs as vfsmod,
27 )
27 )
28 from .utils import hashutil
28 from .utils import hashutil
29
29
30 parsers = policy.importmod('parsers')
30 parsers = policy.importmod('parsers')
31 # how much bytes should be read from fncache in one read
31 # how much bytes should be read from fncache in one read
32 # It is done to prevent loading large fncache files into memory
32 # It is done to prevent loading large fncache files into memory
33 fncache_chunksize = 10 ** 6
33 fncache_chunksize = 10 ** 6
34
34
35
35
36 def _matchtrackedpath(path, matcher):
36 def _matchtrackedpath(path, matcher):
37 """parses a fncache entry and returns whether the entry is tracking a path
37 """parses a fncache entry and returns whether the entry is tracking a path
38 matched by matcher or not.
38 matched by matcher or not.
39
39
40 If matcher is None, returns True"""
40 If matcher is None, returns True"""
41
41
42 if matcher is None:
42 if matcher is None:
43 return True
43 return True
44 path = decodedir(path)
44 path = decodedir(path)
45 if path.startswith(b'data/'):
45 if path.startswith(b'data/'):
46 return matcher(path[len(b'data/') : -len(b'.i')])
46 return matcher(path[len(b'data/') : -len(b'.i')])
47 elif path.startswith(b'meta/'):
47 elif path.startswith(b'meta/'):
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49
49
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51
51
52
52
53 # This avoids a collision between a file named foo and a dir named
53 # This avoids a collision between a file named foo and a dir named
54 # foo.i or foo.d
54 # foo.i or foo.d
55 def _encodedir(path):
55 def _encodedir(path):
56 """
56 """
57 >>> _encodedir(b'data/foo.i')
57 >>> _encodedir(b'data/foo.i')
58 'data/foo.i'
58 'data/foo.i'
59 >>> _encodedir(b'data/foo.i/bla.i')
59 >>> _encodedir(b'data/foo.i/bla.i')
60 'data/foo.i.hg/bla.i'
60 'data/foo.i.hg/bla.i'
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 'data/foo.i.hg.hg/bla.i'
62 'data/foo.i.hg.hg/bla.i'
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 """
65 """
66 return (
66 return (
67 path.replace(b".hg/", b".hg.hg/")
67 path.replace(b".hg/", b".hg.hg/")
68 .replace(b".i/", b".i.hg/")
68 .replace(b".i/", b".i.hg/")
69 .replace(b".d/", b".d.hg/")
69 .replace(b".d/", b".d.hg/")
70 )
70 )
71
71
72
72
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74
74
75
75
76 def decodedir(path):
76 def decodedir(path):
77 """
77 """
78 >>> decodedir(b'data/foo.i')
78 >>> decodedir(b'data/foo.i')
79 'data/foo.i'
79 'data/foo.i'
80 >>> decodedir(b'data/foo.i.hg/bla.i')
80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 'data/foo.i/bla.i'
81 'data/foo.i/bla.i'
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 'data/foo.i.hg/bla.i'
83 'data/foo.i.hg/bla.i'
84 """
84 """
85 if b".hg/" not in path:
85 if b".hg/" not in path:
86 return path
86 return path
87 return (
87 return (
88 path.replace(b".d.hg/", b".d/")
88 path.replace(b".d.hg/", b".d/")
89 .replace(b".i.hg/", b".i/")
89 .replace(b".i.hg/", b".i/")
90 .replace(b".hg.hg/", b".hg/")
90 .replace(b".hg.hg/", b".hg/")
91 )
91 )
92
92
93
93
94 def _reserved():
94 def _reserved():
95 """characters that are problematic for filesystems
95 """characters that are problematic for filesystems
96
96
97 * ascii escapes (0..31)
97 * ascii escapes (0..31)
98 * ascii hi (126..255)
98 * ascii hi (126..255)
99 * windows specials
99 * windows specials
100
100
101 these characters will be escaped by encodefunctions
101 these characters will be escaped by encodefunctions
102 """
102 """
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 for x in range(32):
104 for x in range(32):
105 yield x
105 yield x
106 for x in range(126, 256):
106 for x in range(126, 256):
107 yield x
107 yield x
108 for x in winreserved:
108 for x in winreserved:
109 yield x
109 yield x
110
110
111
111
112 def _buildencodefun():
112 def _buildencodefun():
113 """
113 """
114 >>> enc, dec = _buildencodefun()
114 >>> enc, dec = _buildencodefun()
115
115
116 >>> enc(b'nothing/special.txt')
116 >>> enc(b'nothing/special.txt')
117 'nothing/special.txt'
117 'nothing/special.txt'
118 >>> dec(b'nothing/special.txt')
118 >>> dec(b'nothing/special.txt')
119 'nothing/special.txt'
119 'nothing/special.txt'
120
120
121 >>> enc(b'HELLO')
121 >>> enc(b'HELLO')
122 '_h_e_l_l_o'
122 '_h_e_l_l_o'
123 >>> dec(b'_h_e_l_l_o')
123 >>> dec(b'_h_e_l_l_o')
124 'HELLO'
124 'HELLO'
125
125
126 >>> enc(b'hello:world?')
126 >>> enc(b'hello:world?')
127 'hello~3aworld~3f'
127 'hello~3aworld~3f'
128 >>> dec(b'hello~3aworld~3f')
128 >>> dec(b'hello~3aworld~3f')
129 'hello:world?'
129 'hello:world?'
130
130
131 >>> enc(b'the\\x07quick\\xADshot')
131 >>> enc(b'the\\x07quick\\xADshot')
132 'the~07quick~adshot'
132 'the~07quick~adshot'
133 >>> dec(b'the~07quick~adshot')
133 >>> dec(b'the~07quick~adshot')
134 'the\\x07quick\\xadshot'
134 'the\\x07quick\\xadshot'
135 """
135 """
136 e = b'_'
136 e = b'_'
137 xchr = pycompat.bytechr
137 xchr = pycompat.bytechr
138 asciistr = list(map(xchr, range(127)))
138 asciistr = list(map(xchr, range(127)))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140
140
141 cmap = {x: x for x in asciistr}
141 cmap = {x: x for x in asciistr}
142 for x in _reserved():
142 for x in _reserved():
143 cmap[xchr(x)] = b"~%02x" % x
143 cmap[xchr(x)] = b"~%02x" % x
144 for x in capitals + [ord(e)]:
144 for x in capitals + [ord(e)]:
145 cmap[xchr(x)] = e + xchr(x).lower()
145 cmap[xchr(x)] = e + xchr(x).lower()
146
146
147 dmap = {}
147 dmap = {}
148 for k, v in cmap.items():
148 for k, v in cmap.items():
149 dmap[v] = k
149 dmap[v] = k
150
150
151 def decode(s):
151 def decode(s):
152 i = 0
152 i = 0
153 while i < len(s):
153 while i < len(s):
154 for l in range(1, 4):
154 for l in range(1, 4):
155 try:
155 try:
156 yield dmap[s[i : i + l]]
156 yield dmap[s[i : i + l]]
157 i += l
157 i += l
158 break
158 break
159 except KeyError:
159 except KeyError:
160 pass
160 pass
161 else:
161 else:
162 raise KeyError
162 raise KeyError
163
163
164 return (
164 return (
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 lambda s: b''.join(list(decode(s))),
166 lambda s: b''.join(list(decode(s))),
167 )
167 )
168
168
169
169
170 _encodefname, _decodefname = _buildencodefun()
170 _encodefname, _decodefname = _buildencodefun()
171
171
172
172
173 def encodefilename(s):
173 def encodefilename(s):
174 """
174 """
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 """
177 """
178 return _encodefname(encodedir(s))
178 return _encodefname(encodedir(s))
179
179
180
180
181 def decodefilename(s):
181 def decodefilename(s):
182 """
182 """
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 """
185 """
186 return decodedir(_decodefname(s))
186 return decodedir(_decodefname(s))
187
187
188
188
189 def _buildlowerencodefun():
189 def _buildlowerencodefun():
190 """
190 """
191 >>> f = _buildlowerencodefun()
191 >>> f = _buildlowerencodefun()
192 >>> f(b'nothing/special.txt')
192 >>> f(b'nothing/special.txt')
193 'nothing/special.txt'
193 'nothing/special.txt'
194 >>> f(b'HELLO')
194 >>> f(b'HELLO')
195 'hello'
195 'hello'
196 >>> f(b'hello:world?')
196 >>> f(b'hello:world?')
197 'hello~3aworld~3f'
197 'hello~3aworld~3f'
198 >>> f(b'the\\x07quick\\xADshot')
198 >>> f(b'the\\x07quick\\xADshot')
199 'the~07quick~adshot'
199 'the~07quick~adshot'
200 """
200 """
201 xchr = pycompat.bytechr
201 xchr = pycompat.bytechr
202 cmap = {xchr(x): xchr(x) for x in range(127)}
202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 for x in _reserved():
203 for x in _reserved():
204 cmap[xchr(x)] = b"~%02x" % x
204 cmap[xchr(x)] = b"~%02x" % x
205 for x in range(ord(b"A"), ord(b"Z") + 1):
205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 cmap[xchr(x)] = xchr(x).lower()
206 cmap[xchr(x)] = xchr(x).lower()
207
207
208 def lowerencode(s):
208 def lowerencode(s):
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210
210
211 return lowerencode
211 return lowerencode
212
212
213
213
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215
215
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219
219
220
220
221 def _auxencode(path, dotencode):
221 def _auxencode(path, dotencode):
222 """
222 """
223 Encodes filenames containing names reserved by Windows or which end in
223 Encodes filenames containing names reserved by Windows or which end in
224 period or space. Does not touch other single reserved characters c.
224 period or space. Does not touch other single reserved characters c.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 Additionally encodes space or period at the beginning, if dotencode is
226 Additionally encodes space or period at the beginning, if dotencode is
227 True. Parameter path is assumed to be all lowercase.
227 True. Parameter path is assumed to be all lowercase.
228 A segment only needs encoding if a reserved name appears as a
228 A segment only needs encoding if a reserved name appears as a
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 doesn't need encoding.
230 doesn't need encoding.
231
231
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 >>> _auxencode(s.split(b'/'), True)
233 >>> _auxencode(s.split(b'/'), True)
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 >>> _auxencode(s.split(b'/'), False)
236 >>> _auxencode(s.split(b'/'), False)
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 >>> _auxencode([b'foo. '], True)
238 >>> _auxencode([b'foo. '], True)
239 ['foo.~20']
239 ['foo.~20']
240 >>> _auxencode([b' .foo'], True)
240 >>> _auxencode([b' .foo'], True)
241 ['~20.foo']
241 ['~20.foo']
242 """
242 """
243 for i, n in enumerate(path):
243 for i, n in enumerate(path):
244 if not n:
244 if not n:
245 continue
245 continue
246 if dotencode and n[0] in b'. ':
246 if dotencode and n[0] in b'. ':
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 path[i] = n
248 path[i] = n
249 else:
249 else:
250 l = n.find(b'.')
250 l = n.find(b'.')
251 if l == -1:
251 if l == -1:
252 l = len(n)
252 l = len(n)
253 if (l == 3 and n[:3] in _winres3) or (
253 if (l == 3 and n[:3] in _winres3) or (
254 l == 4
254 l == 4
255 and n[3:4] <= b'9'
255 and n[3:4] <= b'9'
256 and n[3:4] >= b'1'
256 and n[3:4] >= b'1'
257 and n[:3] in _winres4
257 and n[:3] in _winres4
258 ):
258 ):
259 # encode third letter ('aux' -> 'au~78')
259 # encode third letter ('aux' -> 'au~78')
260 ec = b"~%02x" % ord(n[2:3])
260 ec = b"~%02x" % ord(n[2:3])
261 n = n[0:2] + ec + n[3:]
261 n = n[0:2] + ec + n[3:]
262 path[i] = n
262 path[i] = n
263 if n[-1] in b'. ':
263 if n[-1] in b'. ':
264 # encode last period or space ('foo...' -> 'foo..~2e')
264 # encode last period or space ('foo...' -> 'foo..~2e')
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 return path
266 return path
267
267
268
268
269 _maxstorepathlen = 120
269 _maxstorepathlen = 120
270 _dirprefixlen = 8
270 _dirprefixlen = 8
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272
272
273
273
274 def _hashencode(path, dotencode):
274 def _hashencode(path, dotencode):
275 digest = hex(hashutil.sha1(path).digest())
275 digest = hex(hashutil.sha1(path).digest())
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 parts = _auxencode(le, dotencode)
277 parts = _auxencode(le, dotencode)
278 basename = parts[-1]
278 basename = parts[-1]
279 _root, ext = os.path.splitext(basename)
279 _root, ext = os.path.splitext(basename)
280 sdirs = []
280 sdirs = []
281 sdirslen = 0
281 sdirslen = 0
282 for p in parts[:-1]:
282 for p in parts[:-1]:
283 d = p[:_dirprefixlen]
283 d = p[:_dirprefixlen]
284 if d[-1] in b'. ':
284 if d[-1] in b'. ':
285 # Windows can't access dirs ending in period or space
285 # Windows can't access dirs ending in period or space
286 d = d[:-1] + b'_'
286 d = d[:-1] + b'_'
287 if sdirslen == 0:
287 if sdirslen == 0:
288 t = len(d)
288 t = len(d)
289 else:
289 else:
290 t = sdirslen + 1 + len(d)
290 t = sdirslen + 1 + len(d)
291 if t > _maxshortdirslen:
291 if t > _maxshortdirslen:
292 break
292 break
293 sdirs.append(d)
293 sdirs.append(d)
294 sdirslen = t
294 sdirslen = t
295 dirs = b'/'.join(sdirs)
295 dirs = b'/'.join(sdirs)
296 if len(dirs) > 0:
296 if len(dirs) > 0:
297 dirs += b'/'
297 dirs += b'/'
298 res = b'dh/' + dirs + digest + ext
298 res = b'dh/' + dirs + digest + ext
299 spaceleft = _maxstorepathlen - len(res)
299 spaceleft = _maxstorepathlen - len(res)
300 if spaceleft > 0:
300 if spaceleft > 0:
301 filler = basename[:spaceleft]
301 filler = basename[:spaceleft]
302 res = b'dh/' + dirs + filler + digest + ext
302 res = b'dh/' + dirs + filler + digest + ext
303 return res
303 return res
304
304
305
305
306 def _hybridencode(path, dotencode):
306 def _hybridencode(path, dotencode):
307 """encodes path with a length limit
307 """encodes path with a length limit
308
308
309 Encodes all paths that begin with 'data/', according to the following.
309 Encodes all paths that begin with 'data/', according to the following.
310
310
311 Default encoding (reversible):
311 Default encoding (reversible):
312
312
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 characters are encoded as '~xx', where xx is the two digit hex code
314 characters are encoded as '~xx', where xx is the two digit hex code
315 of the character (see encodefilename).
315 of the character (see encodefilename).
316 Relevant path components consisting of Windows reserved filenames are
316 Relevant path components consisting of Windows reserved filenames are
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318
318
319 Hashed encoding (not reversible):
319 Hashed encoding (not reversible):
320
320
321 If the default-encoded path is longer than _maxstorepathlen, a
321 If the default-encoded path is longer than _maxstorepathlen, a
322 non-reversible hybrid hashing of the path is done instead.
322 non-reversible hybrid hashing of the path is done instead.
323 This encoding uses up to _dirprefixlen characters of all directory
323 This encoding uses up to _dirprefixlen characters of all directory
324 levels of the lowerencoded path, but not more levels than can fit into
324 levels of the lowerencoded path, but not more levels than can fit into
325 _maxshortdirslen.
325 _maxshortdirslen.
326 Then follows the filler followed by the sha digest of the full path.
326 Then follows the filler followed by the sha digest of the full path.
327 The filler is the beginning of the basename of the lowerencoded path
327 The filler is the beginning of the basename of the lowerencoded path
328 (the basename is everything after the last path separator). The filler
328 (the basename is everything after the last path separator). The filler
329 is as long as possible, filling in characters from the basename until
329 is as long as possible, filling in characters from the basename until
330 the encoded path has _maxstorepathlen characters (or all chars of the
330 the encoded path has _maxstorepathlen characters (or all chars of the
331 basename have been taken).
331 basename have been taken).
332 The extension (e.g. '.i' or '.d') is preserved.
332 The extension (e.g. '.i' or '.d') is preserved.
333
333
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 encoding was used.
335 encoding was used.
336 """
336 """
337 path = encodedir(path)
337 path = encodedir(path)
338 ef = _encodefname(path).split(b'/')
338 ef = _encodefname(path).split(b'/')
339 res = b'/'.join(_auxencode(ef, dotencode))
339 res = b'/'.join(_auxencode(ef, dotencode))
340 if len(res) > _maxstorepathlen:
340 if len(res) > _maxstorepathlen:
341 res = _hashencode(path, dotencode)
341 res = _hashencode(path, dotencode)
342 return res
342 return res
343
343
344
344
345 def _pathencode(path):
345 def _pathencode(path):
346 de = encodedir(path)
346 de = encodedir(path)
347 if len(path) > _maxstorepathlen:
347 if len(path) > _maxstorepathlen:
348 return _hashencode(de, True)
348 return _hashencode(de, True)
349 ef = _encodefname(de).split(b'/')
349 ef = _encodefname(de).split(b'/')
350 res = b'/'.join(_auxencode(ef, True))
350 res = b'/'.join(_auxencode(ef, True))
351 if len(res) > _maxstorepathlen:
351 if len(res) > _maxstorepathlen:
352 return _hashencode(de, True)
352 return _hashencode(de, True)
353 return res
353 return res
354
354
355
355
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357
357
358
358
359 def _plainhybridencode(f):
359 def _plainhybridencode(f):
360 return _hybridencode(f, False)
360 return _hybridencode(f, False)
361
361
362
362
363 def _calcmode(vfs):
363 def _calcmode(vfs):
364 try:
364 try:
365 # files in .hg/ will be created using this mode
365 # files in .hg/ will be created using this mode
366 mode = vfs.stat().st_mode
366 mode = vfs.stat().st_mode
367 # avoid some useless chmods
367 # avoid some useless chmods
368 if (0o777 & ~util.umask) == (0o777 & mode):
368 if (0o777 & ~util.umask) == (0o777 & mode):
369 mode = None
369 mode = None
370 except OSError:
370 except OSError:
371 mode = None
371 mode = None
372 return mode
372 return mode
373
373
374
374
375 _data = [
375 _data = [
376 b'bookmarks',
376 b'bookmarks',
377 b'narrowspec',
377 b'narrowspec',
378 b'data',
378 b'data',
379 b'meta',
379 b'meta',
380 b'00manifest.d',
380 b'00manifest.d',
381 b'00manifest.i',
381 b'00manifest.i',
382 b'00changelog.d',
382 b'00changelog.d',
383 b'00changelog.i',
383 b'00changelog.i',
384 b'phaseroots',
384 b'phaseroots',
385 b'obsstore',
385 b'obsstore',
386 b'requires',
386 b'requires',
387 ]
387 ]
388
388
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 REVLOG_FILES_OTHER_EXT = (
390 REVLOG_FILES_OTHER_EXT = (
391 b'.idx',
391 b'.idx',
392 b'.d',
392 b'.d',
393 b'.dat',
393 b'.dat',
394 b'.n',
394 b'.n',
395 b'.nd',
395 b'.nd',
396 b'.sda',
396 b'.sda',
397 )
397 )
398 # files that are "volatile" and might change between listing and streaming
398 # files that are "volatile" and might change between listing and streaming
399 #
399 #
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 # deleted.
401 # deleted.
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403
403
404 # some exception to the above matching
404 # some exception to the above matching
405 #
405 #
406 # XXX This is currently not in use because of issue6542
406 # XXX This is currently not in use because of issue6542
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408
408
409
409
410 def is_revlog(f, kind, st):
410 def is_revlog(f, kind, st):
411 if kind != stat.S_IFREG:
411 if kind != stat.S_IFREG:
412 return None
412 return None
413 return revlog_type(f)
413 return revlog_type(f)
414
414
415
415
416 def revlog_type(f):
416 def revlog_type(f):
417 # XXX we need to filter `undo.` created by the transaction here, however
417 # XXX we need to filter `undo.` created by the transaction here, however
418 # being naive about it also filter revlog for `undo.*` files, leading to
418 # being naive about it also filter revlog for `undo.*` files, leading to
419 # issue6542. So we no longer use EXCLUDED.
419 # issue6542. So we no longer use EXCLUDED.
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 return FILEFLAGS_REVLOG_MAIN
421 return FILEFLAGS_REVLOG_MAIN
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 t = FILETYPE_FILELOG_OTHER
423 t = FILETYPE_FILELOG_OTHER
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 t |= FILEFLAGS_VOLATILE
425 t |= FILEFLAGS_VOLATILE
426 return t
426 return t
427 return None
427 return None
428
428
429
429
430 # the file is part of changelog data
430 # the file is part of changelog data
431 FILEFLAGS_CHANGELOG = 1 << 13
431 FILEFLAGS_CHANGELOG = 1 << 13
432 # the file is part of manifest data
432 # the file is part of manifest data
433 FILEFLAGS_MANIFESTLOG = 1 << 12
433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 # the file is part of filelog data
434 # the file is part of filelog data
435 FILEFLAGS_FILELOG = 1 << 11
435 FILEFLAGS_FILELOG = 1 << 11
436 # file that are not directly part of a revlog
436 # file that are not directly part of a revlog
437 FILEFLAGS_OTHER = 1 << 10
437 FILEFLAGS_OTHER = 1 << 10
438
438
439 # the main entry point for a revlog
439 # the main entry point for a revlog
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 # a secondary file for a revlog
441 # a secondary file for a revlog
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443
443
444 # files that are "volatile" and might change between listing and streaming
444 # files that are "volatile" and might change between listing and streaming
445 FILEFLAGS_VOLATILE = 1 << 20
445 FILEFLAGS_VOLATILE = 1 << 20
446
446
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
453 FILETYPE_OTHER = FILEFLAGS_OTHER
454
454
455
455
456 @attr.s(slots=True)
456 @attr.s(slots=True)
457 class StoreEntry:
457 class BaseStoreEntry:
458 """An entry in the store
458 """An entry in the store
459
459
460 This is returned by `store.walk` and represent some data in the store."""
460 This is returned by `store.walk` and represent some data in the store."""
461
461
462 unencoded_path = attr.ib()
462 unencoded_path = attr.ib()
463 is_revlog = attr.ib(default=False)
464 revlog_type = attr.ib(default=None)
465 is_revlog_main = attr.ib(default=None)
466 is_volatile = attr.ib(default=False)
463 is_volatile = attr.ib(default=False)
467 file_size = attr.ib(default=None)
464 file_size = attr.ib(default=None)
468
465
469 def files(self):
466 def files(self):
470 return [
467 return [
471 StoreFile(
468 StoreFile(
472 unencoded_path=self.unencoded_path,
469 unencoded_path=self.unencoded_path,
473 file_size=self.file_size,
470 file_size=self.file_size,
474 is_volatile=self.is_volatile,
471 is_volatile=self.is_volatile,
475 )
472 )
476 ]
473 ]
477
474
478
475
479 @attr.s(slots=True)
476 @attr.s(slots=True)
477 class SimpleStoreEntry(BaseStoreEntry):
478 """A generic entry in the store"""
479
480 is_revlog = False
481
482
483 @attr.s(slots=True)
484 class RevlogStoreEntry(BaseStoreEntry):
485 """A revlog entry in the store"""
486
487 is_revlog = True
488 revlog_type = attr.ib(default=None)
489 is_revlog_main = attr.ib(default=None)
490
491
492 @attr.s(slots=True)
480 class StoreFile:
493 class StoreFile:
481 """a file matching an entry"""
494 """a file matching an entry"""
482
495
483 unencoded_path = attr.ib()
496 unencoded_path = attr.ib()
484 file_size = attr.ib()
497 file_size = attr.ib()
485 is_volatile = attr.ib(default=False)
498 is_volatile = attr.ib(default=False)
486
499
487
500
488 class basicstore:
501 class basicstore:
489 '''base class for local repository stores'''
502 '''base class for local repository stores'''
490
503
491 def __init__(self, path, vfstype):
504 def __init__(self, path, vfstype):
492 vfs = vfstype(path)
505 vfs = vfstype(path)
493 self.path = vfs.base
506 self.path = vfs.base
494 self.createmode = _calcmode(vfs)
507 self.createmode = _calcmode(vfs)
495 vfs.createmode = self.createmode
508 vfs.createmode = self.createmode
496 self.rawvfs = vfs
509 self.rawvfs = vfs
497 self.vfs = vfsmod.filtervfs(vfs, encodedir)
510 self.vfs = vfsmod.filtervfs(vfs, encodedir)
498 self.opener = self.vfs
511 self.opener = self.vfs
499
512
500 def join(self, f):
513 def join(self, f):
501 return self.path + b'/' + encodedir(f)
514 return self.path + b'/' + encodedir(f)
502
515
503 def _walk(self, relpath, recurse):
516 def _walk(self, relpath, recurse):
504 '''yields (revlog_type, unencoded, size)'''
517 '''yields (revlog_type, unencoded, size)'''
505 path = self.path
518 path = self.path
506 if relpath:
519 if relpath:
507 path += b'/' + relpath
520 path += b'/' + relpath
508 striplen = len(self.path) + 1
521 striplen = len(self.path) + 1
509 l = []
522 l = []
510 if self.rawvfs.isdir(path):
523 if self.rawvfs.isdir(path):
511 visit = [path]
524 visit = [path]
512 readdir = self.rawvfs.readdir
525 readdir = self.rawvfs.readdir
513 while visit:
526 while visit:
514 p = visit.pop()
527 p = visit.pop()
515 for f, kind, st in readdir(p, stat=True):
528 for f, kind, st in readdir(p, stat=True):
516 fp = p + b'/' + f
529 fp = p + b'/' + f
517 rl_type = is_revlog(f, kind, st)
530 rl_type = is_revlog(f, kind, st)
518 if rl_type is not None:
531 if rl_type is not None:
519 n = util.pconvert(fp[striplen:])
532 n = util.pconvert(fp[striplen:])
520 l.append((rl_type, decodedir(n), st.st_size))
533 l.append((rl_type, decodedir(n), st.st_size))
521 elif kind == stat.S_IFDIR and recurse:
534 elif kind == stat.S_IFDIR and recurse:
522 visit.append(fp)
535 visit.append(fp)
523 l.sort()
536 l.sort()
524 return l
537 return l
525
538
526 def changelog(self, trypending, concurrencychecker=None):
539 def changelog(self, trypending, concurrencychecker=None):
527 return changelog.changelog(
540 return changelog.changelog(
528 self.vfs,
541 self.vfs,
529 trypending=trypending,
542 trypending=trypending,
530 concurrencychecker=concurrencychecker,
543 concurrencychecker=concurrencychecker,
531 )
544 )
532
545
533 def manifestlog(self, repo, storenarrowmatch):
546 def manifestlog(self, repo, storenarrowmatch):
534 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
547 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
535 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
548 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
536
549
537 def datafiles(
550 def datafiles(
538 self, matcher=None, undecodable=None
551 self, matcher=None, undecodable=None
539 ) -> Generator[StoreEntry, None, None]:
552 ) -> Generator[BaseStoreEntry, None, None]:
540 """Like walk, but excluding the changelog and root manifest.
553 """Like walk, but excluding the changelog and root manifest.
541
554
542 When [undecodable] is None, revlogs names that can't be
555 When [undecodable] is None, revlogs names that can't be
543 decoded cause an exception. When it is provided, it should
556 decoded cause an exception. When it is provided, it should
544 be a list and the filenames that can't be decoded are added
557 be a list and the filenames that can't be decoded are added
545 to it instead. This is very rarely needed."""
558 to it instead. This is very rarely needed."""
546 files = self._walk(b'data', True) + self._walk(b'meta', True)
559 files = self._walk(b'data', True) + self._walk(b'meta', True)
547 for (t, u, s) in files:
560 for (t, u, s) in files:
548 if t is not None:
561 if t is not None:
549 yield StoreEntry(
562 yield RevlogStoreEntry(
550 unencoded_path=u,
563 unencoded_path=u,
551 is_revlog=True,
552 revlog_type=FILEFLAGS_FILELOG,
564 revlog_type=FILEFLAGS_FILELOG,
553 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
565 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
554 is_volatile=bool(t & FILEFLAGS_VOLATILE),
566 is_volatile=bool(t & FILEFLAGS_VOLATILE),
555 file_size=s,
567 file_size=s,
556 )
568 )
557
569
558 def topfiles(self) -> Generator[StoreEntry, None, None]:
570 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
559 # yield manifest before changelog
571 # yield manifest before changelog
560 files = reversed(self._walk(b'', False))
572 files = reversed(self._walk(b'', False))
561 for (t, u, s) in files:
573 for (t, u, s) in files:
562 if u.startswith(b'00changelog'):
574 if u.startswith(b'00changelog'):
563 revlog_type = FILEFLAGS_CHANGELOG
575 yield RevlogStoreEntry(
576 unencoded_path=u,
577 revlog_type=FILEFLAGS_CHANGELOG,
578 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
579 is_volatile=bool(t & FILEFLAGS_VOLATILE),
580 file_size=s,
581 )
564 elif u.startswith(b'00manifest'):
582 elif u.startswith(b'00manifest'):
565 revlog_type = FILEFLAGS_MANIFESTLOG
583 yield RevlogStoreEntry(
584 unencoded_path=u,
585 revlog_type=FILEFLAGS_MANIFESTLOG,
586 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
587 is_volatile=bool(t & FILEFLAGS_VOLATILE),
588 file_size=s,
589 )
566 else:
590 else:
567 revlog_type = None
591 yield SimpleStoreEntry(
568 yield StoreEntry(
592 unencoded_path=u,
569 unencoded_path=u,
593 is_volatile=bool(t & FILEFLAGS_VOLATILE),
570 is_revlog=revlog_type is not None,
594 file_size=s,
571 revlog_type=revlog_type,
595 )
572 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
573 is_volatile=bool(t & FILEFLAGS_VOLATILE),
574 file_size=s,
575 )
576
596
577 def walk(self, matcher=None) -> Generator[StoreEntry, None, None]:
597 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
578 """return files related to data storage (ie: revlogs)
598 """return files related to data storage (ie: revlogs)
579
599
580 yields (file_type, unencoded, size)
600 yields (file_type, unencoded, size)
581
601
582 if a matcher is passed, storage files of only those tracked paths
602 if a matcher is passed, storage files of only those tracked paths
583 are passed with matches the matcher
603 are passed with matches the matcher
584 """
604 """
585 # yield data files first
605 # yield data files first
586 for x in self.datafiles(matcher):
606 for x in self.datafiles(matcher):
587 yield x
607 yield x
588 for x in self.topfiles():
608 for x in self.topfiles():
589 yield x
609 yield x
590
610
591 def copylist(self):
611 def copylist(self):
592 return _data
612 return _data
593
613
594 def write(self, tr):
614 def write(self, tr):
595 pass
615 pass
596
616
597 def invalidatecaches(self):
617 def invalidatecaches(self):
598 pass
618 pass
599
619
600 def markremoved(self, fn):
620 def markremoved(self, fn):
601 pass
621 pass
602
622
603 def __contains__(self, path):
623 def __contains__(self, path):
604 '''Checks if the store contains path'''
624 '''Checks if the store contains path'''
605 path = b"/".join((b"data", path))
625 path = b"/".join((b"data", path))
606 # file?
626 # file?
607 if self.vfs.exists(path + b".i"):
627 if self.vfs.exists(path + b".i"):
608 return True
628 return True
609 # dir?
629 # dir?
610 if not path.endswith(b"/"):
630 if not path.endswith(b"/"):
611 path = path + b"/"
631 path = path + b"/"
612 return self.vfs.exists(path)
632 return self.vfs.exists(path)
613
633
614
634
615 class encodedstore(basicstore):
635 class encodedstore(basicstore):
616 def __init__(self, path, vfstype):
636 def __init__(self, path, vfstype):
617 vfs = vfstype(path + b'/store')
637 vfs = vfstype(path + b'/store')
618 self.path = vfs.base
638 self.path = vfs.base
619 self.createmode = _calcmode(vfs)
639 self.createmode = _calcmode(vfs)
620 vfs.createmode = self.createmode
640 vfs.createmode = self.createmode
621 self.rawvfs = vfs
641 self.rawvfs = vfs
622 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
642 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
623 self.opener = self.vfs
643 self.opener = self.vfs
624
644
625 # note: topfiles would also need a decode phase. It is just that in
645 # note: topfiles would also need a decode phase. It is just that in
626 # practice we do not have any file outside of `data/` that needs encoding.
646 # practice we do not have any file outside of `data/` that needs encoding.
627 # However that might change so we should probably add a test and encoding
647 # However that might change so we should probably add a test and encoding
628 # decoding for it too. see issue6548
648 # decoding for it too. see issue6548
629
649
630 def datafiles(
650 def datafiles(
631 self, matcher=None, undecodable=None
651 self, matcher=None, undecodable=None
632 ) -> Generator[StoreEntry, None, None]:
652 ) -> Generator[BaseStoreEntry, None, None]:
633 for entry in super(encodedstore, self).datafiles():
653 for entry in super(encodedstore, self).datafiles():
634 try:
654 try:
635 f1 = entry.unencoded_path
655 f1 = entry.unencoded_path
636 f2 = decodefilename(f1)
656 f2 = decodefilename(f1)
637 except KeyError:
657 except KeyError:
638 if undecodable is None:
658 if undecodable is None:
639 msg = _(b'undecodable revlog name %s') % f1
659 msg = _(b'undecodable revlog name %s') % f1
640 raise error.StorageError(msg)
660 raise error.StorageError(msg)
641 else:
661 else:
642 undecodable.append(f1)
662 undecodable.append(f1)
643 continue
663 continue
644 if not _matchtrackedpath(f2, matcher):
664 if not _matchtrackedpath(f2, matcher):
645 continue
665 continue
646 entry.unencoded_path = f2
666 entry.unencoded_path = f2
647 yield entry
667 yield entry
648
668
649 def join(self, f):
669 def join(self, f):
650 return self.path + b'/' + encodefilename(f)
670 return self.path + b'/' + encodefilename(f)
651
671
652 def copylist(self):
672 def copylist(self):
653 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
673 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
654
674
655
675
656 class fncache:
676 class fncache:
657 # the filename used to be partially encoded
677 # the filename used to be partially encoded
658 # hence the encodedir/decodedir dance
678 # hence the encodedir/decodedir dance
659 def __init__(self, vfs):
679 def __init__(self, vfs):
660 self.vfs = vfs
680 self.vfs = vfs
661 self._ignores = set()
681 self._ignores = set()
662 self.entries = None
682 self.entries = None
663 self._dirty = False
683 self._dirty = False
664 # set of new additions to fncache
684 # set of new additions to fncache
665 self.addls = set()
685 self.addls = set()
666
686
667 def ensureloaded(self, warn=None):
687 def ensureloaded(self, warn=None):
668 """read the fncache file if not already read.
688 """read the fncache file if not already read.
669
689
670 If the file on disk is corrupted, raise. If warn is provided,
690 If the file on disk is corrupted, raise. If warn is provided,
671 warn and keep going instead."""
691 warn and keep going instead."""
672 if self.entries is None:
692 if self.entries is None:
673 self._load(warn)
693 self._load(warn)
674
694
675 def _load(self, warn=None):
695 def _load(self, warn=None):
676 '''fill the entries from the fncache file'''
696 '''fill the entries from the fncache file'''
677 self._dirty = False
697 self._dirty = False
678 try:
698 try:
679 fp = self.vfs(b'fncache', mode=b'rb')
699 fp = self.vfs(b'fncache', mode=b'rb')
680 except IOError:
700 except IOError:
681 # skip nonexistent file
701 # skip nonexistent file
682 self.entries = set()
702 self.entries = set()
683 return
703 return
684
704
685 self.entries = set()
705 self.entries = set()
686 chunk = b''
706 chunk = b''
687 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
707 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
688 chunk += c
708 chunk += c
689 try:
709 try:
690 p = chunk.rindex(b'\n')
710 p = chunk.rindex(b'\n')
691 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
711 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
692 chunk = chunk[p + 1 :]
712 chunk = chunk[p + 1 :]
693 except ValueError:
713 except ValueError:
694 # substring '\n' not found, maybe the entry is bigger than the
714 # substring '\n' not found, maybe the entry is bigger than the
695 # chunksize, so let's keep iterating
715 # chunksize, so let's keep iterating
696 pass
716 pass
697
717
698 if chunk:
718 if chunk:
699 msg = _(b"fncache does not ends with a newline")
719 msg = _(b"fncache does not ends with a newline")
700 if warn:
720 if warn:
701 warn(msg + b'\n')
721 warn(msg + b'\n')
702 else:
722 else:
703 raise error.Abort(
723 raise error.Abort(
704 msg,
724 msg,
705 hint=_(
725 hint=_(
706 b"use 'hg debugrebuildfncache' to "
726 b"use 'hg debugrebuildfncache' to "
707 b"rebuild the fncache"
727 b"rebuild the fncache"
708 ),
728 ),
709 )
729 )
710 self._checkentries(fp, warn)
730 self._checkentries(fp, warn)
711 fp.close()
731 fp.close()
712
732
713 def _checkentries(self, fp, warn):
733 def _checkentries(self, fp, warn):
714 """make sure there is no empty string in entries"""
734 """make sure there is no empty string in entries"""
715 if b'' in self.entries:
735 if b'' in self.entries:
716 fp.seek(0)
736 fp.seek(0)
717 for n, line in enumerate(fp):
737 for n, line in enumerate(fp):
718 if not line.rstrip(b'\n'):
738 if not line.rstrip(b'\n'):
719 t = _(b'invalid entry in fncache, line %d') % (n + 1)
739 t = _(b'invalid entry in fncache, line %d') % (n + 1)
720 if warn:
740 if warn:
721 warn(t + b'\n')
741 warn(t + b'\n')
722 else:
742 else:
723 raise error.Abort(t)
743 raise error.Abort(t)
724
744
725 def write(self, tr):
745 def write(self, tr):
726 if self._dirty:
746 if self._dirty:
727 assert self.entries is not None
747 assert self.entries is not None
728 self.entries = self.entries | self.addls
748 self.entries = self.entries | self.addls
729 self.addls = set()
749 self.addls = set()
730 tr.addbackup(b'fncache')
750 tr.addbackup(b'fncache')
731 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
751 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
732 if self.entries:
752 if self.entries:
733 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
753 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
734 fp.close()
754 fp.close()
735 self._dirty = False
755 self._dirty = False
736 if self.addls:
756 if self.addls:
737 # if we have just new entries, let's append them to the fncache
757 # if we have just new entries, let's append them to the fncache
738 tr.addbackup(b'fncache')
758 tr.addbackup(b'fncache')
739 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
759 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
740 if self.addls:
760 if self.addls:
741 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
761 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
742 fp.close()
762 fp.close()
743 self.entries = None
763 self.entries = None
744 self.addls = set()
764 self.addls = set()
745
765
746 def addignore(self, fn):
766 def addignore(self, fn):
747 self._ignores.add(fn)
767 self._ignores.add(fn)
748
768
749 def add(self, fn):
769 def add(self, fn):
750 if fn in self._ignores:
770 if fn in self._ignores:
751 return
771 return
752 if self.entries is None:
772 if self.entries is None:
753 self._load()
773 self._load()
754 if fn not in self.entries:
774 if fn not in self.entries:
755 self.addls.add(fn)
775 self.addls.add(fn)
756
776
757 def remove(self, fn):
777 def remove(self, fn):
758 if self.entries is None:
778 if self.entries is None:
759 self._load()
779 self._load()
760 if fn in self.addls:
780 if fn in self.addls:
761 self.addls.remove(fn)
781 self.addls.remove(fn)
762 return
782 return
763 try:
783 try:
764 self.entries.remove(fn)
784 self.entries.remove(fn)
765 self._dirty = True
785 self._dirty = True
766 except KeyError:
786 except KeyError:
767 pass
787 pass
768
788
769 def __contains__(self, fn):
789 def __contains__(self, fn):
770 if fn in self.addls:
790 if fn in self.addls:
771 return True
791 return True
772 if self.entries is None:
792 if self.entries is None:
773 self._load()
793 self._load()
774 return fn in self.entries
794 return fn in self.entries
775
795
776 def __iter__(self):
796 def __iter__(self):
777 if self.entries is None:
797 if self.entries is None:
778 self._load()
798 self._load()
779 return iter(self.entries | self.addls)
799 return iter(self.entries | self.addls)
780
800
781
801
782 class _fncachevfs(vfsmod.proxyvfs):
802 class _fncachevfs(vfsmod.proxyvfs):
783 def __init__(self, vfs, fnc, encode):
803 def __init__(self, vfs, fnc, encode):
784 vfsmod.proxyvfs.__init__(self, vfs)
804 vfsmod.proxyvfs.__init__(self, vfs)
785 self.fncache = fnc
805 self.fncache = fnc
786 self.encode = encode
806 self.encode = encode
787
807
788 def __call__(self, path, mode=b'r', *args, **kw):
808 def __call__(self, path, mode=b'r', *args, **kw):
789 encoded = self.encode(path)
809 encoded = self.encode(path)
790 if (
810 if (
791 mode not in (b'r', b'rb')
811 mode not in (b'r', b'rb')
792 and (path.startswith(b'data/') or path.startswith(b'meta/'))
812 and (path.startswith(b'data/') or path.startswith(b'meta/'))
793 and revlog_type(path) is not None
813 and revlog_type(path) is not None
794 ):
814 ):
795 # do not trigger a fncache load when adding a file that already is
815 # do not trigger a fncache load when adding a file that already is
796 # known to exist.
816 # known to exist.
797 notload = self.fncache.entries is None and self.vfs.exists(encoded)
817 notload = self.fncache.entries is None and self.vfs.exists(encoded)
798 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
818 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
799 # when appending to an existing file, if the file has size zero,
819 # when appending to an existing file, if the file has size zero,
800 # it should be considered as missing. Such zero-size files are
820 # it should be considered as missing. Such zero-size files are
801 # the result of truncation when a transaction is aborted.
821 # the result of truncation when a transaction is aborted.
802 notload = False
822 notload = False
803 if not notload:
823 if not notload:
804 self.fncache.add(path)
824 self.fncache.add(path)
805 return self.vfs(encoded, mode, *args, **kw)
825 return self.vfs(encoded, mode, *args, **kw)
806
826
807 def join(self, path):
827 def join(self, path):
808 if path:
828 if path:
809 return self.vfs.join(self.encode(path))
829 return self.vfs.join(self.encode(path))
810 else:
830 else:
811 return self.vfs.join(path)
831 return self.vfs.join(path)
812
832
813 def register_file(self, path):
833 def register_file(self, path):
814 """generic hook point to lets fncache steer its stew"""
834 """generic hook point to lets fncache steer its stew"""
815 if path.startswith(b'data/') or path.startswith(b'meta/'):
835 if path.startswith(b'data/') or path.startswith(b'meta/'):
816 self.fncache.add(path)
836 self.fncache.add(path)
817
837
818
838
819 class fncachestore(basicstore):
839 class fncachestore(basicstore):
820 def __init__(self, path, vfstype, dotencode):
840 def __init__(self, path, vfstype, dotencode):
821 if dotencode:
841 if dotencode:
822 encode = _pathencode
842 encode = _pathencode
823 else:
843 else:
824 encode = _plainhybridencode
844 encode = _plainhybridencode
825 self.encode = encode
845 self.encode = encode
826 vfs = vfstype(path + b'/store')
846 vfs = vfstype(path + b'/store')
827 self.path = vfs.base
847 self.path = vfs.base
828 self.pathsep = self.path + b'/'
848 self.pathsep = self.path + b'/'
829 self.createmode = _calcmode(vfs)
849 self.createmode = _calcmode(vfs)
830 vfs.createmode = self.createmode
850 vfs.createmode = self.createmode
831 self.rawvfs = vfs
851 self.rawvfs = vfs
832 fnc = fncache(vfs)
852 fnc = fncache(vfs)
833 self.fncache = fnc
853 self.fncache = fnc
834 self.vfs = _fncachevfs(vfs, fnc, encode)
854 self.vfs = _fncachevfs(vfs, fnc, encode)
835 self.opener = self.vfs
855 self.opener = self.vfs
836
856
837 def join(self, f):
857 def join(self, f):
838 return self.pathsep + self.encode(f)
858 return self.pathsep + self.encode(f)
839
859
840 def getsize(self, path):
860 def getsize(self, path):
841 return self.rawvfs.stat(path).st_size
861 return self.rawvfs.stat(path).st_size
842
862
843 def datafiles(
863 def datafiles(
844 self, matcher=None, undecodable=None
864 self, matcher=None, undecodable=None
845 ) -> Generator[StoreEntry, None, None]:
865 ) -> Generator[BaseStoreEntry, None, None]:
846 for f in sorted(self.fncache):
866 for f in sorted(self.fncache):
847 if not _matchtrackedpath(f, matcher):
867 if not _matchtrackedpath(f, matcher):
848 continue
868 continue
849 ef = self.encode(f)
869 ef = self.encode(f)
850 t = revlog_type(f)
870 t = revlog_type(f)
851 if t is None:
871 if t is None:
852 # Note: this should not be in the fncache then…
872 # Note: this should not be in the fncache then…
853 #
873 #
854 # However the fncache might contains such file added by
874 # However the fncache might contains such file added by
855 # previous version of Mercurial.
875 # previous version of Mercurial.
856 continue
876 continue
857 t |= FILEFLAGS_FILELOG
858 try:
877 try:
859 yield StoreEntry(
878 yield RevlogStoreEntry(
860 unencoded_path=f,
879 unencoded_path=f,
861 is_revlog=True,
862 revlog_type=FILEFLAGS_FILELOG,
880 revlog_type=FILEFLAGS_FILELOG,
863 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
881 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
864 is_volatile=bool(t & FILEFLAGS_VOLATILE),
882 is_volatile=bool(t & FILEFLAGS_VOLATILE),
865 file_size=self.getsize(ef),
883 file_size=self.getsize(ef),
866 )
884 )
867 except FileNotFoundError:
885 except FileNotFoundError:
868 pass
886 pass
869
887
870 def copylist(self):
888 def copylist(self):
871 d = (
889 d = (
872 b'bookmarks',
890 b'bookmarks',
873 b'narrowspec',
891 b'narrowspec',
874 b'data',
892 b'data',
875 b'meta',
893 b'meta',
876 b'dh',
894 b'dh',
877 b'fncache',
895 b'fncache',
878 b'phaseroots',
896 b'phaseroots',
879 b'obsstore',
897 b'obsstore',
880 b'00manifest.d',
898 b'00manifest.d',
881 b'00manifest.i',
899 b'00manifest.i',
882 b'00changelog.d',
900 b'00changelog.d',
883 b'00changelog.i',
901 b'00changelog.i',
884 b'requires',
902 b'requires',
885 )
903 )
886 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
904 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
887
905
888 def write(self, tr):
906 def write(self, tr):
889 self.fncache.write(tr)
907 self.fncache.write(tr)
890
908
891 def invalidatecaches(self):
909 def invalidatecaches(self):
892 self.fncache.entries = None
910 self.fncache.entries = None
893 self.fncache.addls = set()
911 self.fncache.addls = set()
894
912
895 def markremoved(self, fn):
913 def markremoved(self, fn):
896 self.fncache.remove(fn)
914 self.fncache.remove(fn)
897
915
898 def _exists(self, f):
916 def _exists(self, f):
899 ef = self.encode(f)
917 ef = self.encode(f)
900 try:
918 try:
901 self.getsize(ef)
919 self.getsize(ef)
902 return True
920 return True
903 except FileNotFoundError:
921 except FileNotFoundError:
904 return False
922 return False
905
923
906 def __contains__(self, path):
924 def __contains__(self, path):
907 '''Checks if the store contains path'''
925 '''Checks if the store contains path'''
908 path = b"/".join((b"data", path))
926 path = b"/".join((b"data", path))
909 # check for files (exact match)
927 # check for files (exact match)
910 e = path + b'.i'
928 e = path + b'.i'
911 if e in self.fncache and self._exists(e):
929 if e in self.fncache and self._exists(e):
912 return True
930 return True
913 # now check for directories (prefix match)
931 # now check for directories (prefix match)
914 if not path.endswith(b'/'):
932 if not path.endswith(b'/'):
915 path += b'/'
933 path += b'/'
916 for e in self.fncache:
934 for e in self.fncache:
917 if e.startswith(path) and self._exists(e):
935 if e.startswith(path) and self._exists(e):
918 return True
936 return True
919 return False
937 return False
General Comments 0
You need to be logged in to leave comments. Login now