##// END OF EJS Templates
store: rename `unencoded_path` to `entry_path` for StoreEntry...
marmoute -
r51388:ed8cda1c default
parent child Browse files
Show More
@@ -1,442 +1,442 b''
1 1 # remotefilelogserver.py - server logic for a remotefilelog server
2 2 #
3 3 # Copyright 2013 Facebook, Inc.
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import os
9 9 import stat
10 10 import time
11 11 import zlib
12 12
13 13 from mercurial.i18n import _
14 14 from mercurial.node import bin, hex
15 15 from mercurial.pycompat import open
16 16 from mercurial import (
17 17 changegroup,
18 18 changelog,
19 19 context,
20 20 error,
21 21 extensions,
22 22 match,
23 23 scmutil,
24 24 store,
25 25 streamclone,
26 26 util,
27 27 wireprotoserver,
28 28 wireprototypes,
29 29 wireprotov1server,
30 30 )
31 31 from . import (
32 32 constants,
33 33 shallowutil,
34 34 )
35 35
36 36 _sshv1server = wireprotoserver.sshv1protocolhandler
37 37
38 38
39 39 def setupserver(ui, repo):
40 40 """Sets up a normal Mercurial repo so it can serve files to shallow repos."""
41 41 onetimesetup(ui)
42 42
43 43 # don't send files to shallow clients during pulls
44 44 def generatefiles(
45 45 orig, self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
46 46 ):
47 47 caps = self._bundlecaps or []
48 48 if constants.BUNDLE2_CAPABLITY in caps:
49 49 # only send files that don't match the specified patterns
50 50 includepattern = None
51 51 excludepattern = None
52 52 for cap in self._bundlecaps or []:
53 53 if cap.startswith(b"includepattern="):
54 54 includepattern = cap[len(b"includepattern=") :].split(b'\0')
55 55 elif cap.startswith(b"excludepattern="):
56 56 excludepattern = cap[len(b"excludepattern=") :].split(b'\0')
57 57
58 58 m = match.always()
59 59 if includepattern or excludepattern:
60 60 m = match.match(
61 61 repo.root, b'', None, includepattern, excludepattern
62 62 )
63 63
64 64 changedfiles = list([f for f in changedfiles if not m(f)])
65 65 return orig(
66 66 self, changedfiles, linknodes, commonrevs, source, *args, **kwargs
67 67 )
68 68
69 69 extensions.wrapfunction(
70 70 changegroup.cgpacker, b'generatefiles', generatefiles
71 71 )
72 72
73 73
74 74 onetime = False
75 75
76 76
77 77 def onetimesetup(ui):
78 78 """Configures the wireprotocol for both clients and servers."""
79 79 global onetime
80 80 if onetime:
81 81 return
82 82 onetime = True
83 83
84 84 # support file content requests
85 85 wireprotov1server.wireprotocommand(
86 86 b'x_rfl_getflogheads', b'path', permission=b'pull'
87 87 )(getflogheads)
88 88 wireprotov1server.wireprotocommand(
89 89 b'x_rfl_getfiles', b'', permission=b'pull'
90 90 )(getfiles)
91 91 wireprotov1server.wireprotocommand(
92 92 b'x_rfl_getfile', b'file node', permission=b'pull'
93 93 )(getfile)
94 94
95 95 class streamstate:
96 96 match = None
97 97 shallowremote = False
98 98 noflatmf = False
99 99
100 100 state = streamstate()
101 101
102 102 def stream_out_shallow(repo, proto, other):
103 103 includepattern = None
104 104 excludepattern = None
105 105 raw = other.get(b'includepattern')
106 106 if raw:
107 107 includepattern = raw.split(b'\0')
108 108 raw = other.get(b'excludepattern')
109 109 if raw:
110 110 excludepattern = raw.split(b'\0')
111 111
112 112 oldshallow = state.shallowremote
113 113 oldmatch = state.match
114 114 oldnoflatmf = state.noflatmf
115 115 try:
116 116 state.shallowremote = True
117 117 state.match = match.always()
118 118 state.noflatmf = other.get(b'noflatmanifest') == b'True'
119 119 if includepattern or excludepattern:
120 120 state.match = match.match(
121 121 repo.root, b'', None, includepattern, excludepattern
122 122 )
123 123 streamres = wireprotov1server.stream(repo, proto)
124 124
125 125 # Force the first value to execute, so the file list is computed
126 126 # within the try/finally scope
127 127 first = next(streamres.gen)
128 128 second = next(streamres.gen)
129 129
130 130 def gen():
131 131 yield first
132 132 yield second
133 133 for value in streamres.gen:
134 134 yield value
135 135
136 136 return wireprototypes.streamres(gen())
137 137 finally:
138 138 state.shallowremote = oldshallow
139 139 state.match = oldmatch
140 140 state.noflatmf = oldnoflatmf
141 141
142 142 wireprotov1server.commands[b'stream_out_shallow'] = (
143 143 stream_out_shallow,
144 144 b'*',
145 145 )
146 146
147 147 # don't clone filelogs to shallow clients
148 148 def _walkstreamfiles(orig, repo, matcher=None):
149 149 if state.shallowremote:
150 150 # if we are shallow ourselves, stream our local commits
151 151 if shallowutil.isenabled(repo):
152 152 striplen = len(repo.store.path) + 1
153 153 readdir = repo.store.rawvfs.readdir
154 154 visit = [os.path.join(repo.store.path, b'data')]
155 155 while visit:
156 156 p = visit.pop()
157 157 for f, kind, st in readdir(p, stat=True):
158 158 fp = p + b'/' + f
159 159 if kind == stat.S_IFREG:
160 160 if not fp.endswith(b'.i') and not fp.endswith(
161 161 b'.d'
162 162 ):
163 163 n = util.pconvert(fp[striplen:])
164 164 d = store.decodedir(n)
165 165 yield store.SimpleStoreEntry(
166 unencoded_path=d,
166 entry_path=d,
167 167 is_volatile=False,
168 168 file_size=st.st_size,
169 169 )
170 170
171 171 if kind == stat.S_IFDIR:
172 172 visit.append(fp)
173 173
174 174 if scmutil.istreemanifest(repo):
175 175 for entry in repo.store.datafiles():
176 176 if not entry.is_revlog:
177 177 continue
178 178 if entry.revlog_type == store.FILEFLAGS_MANIFESTLOG:
179 179 yield entry
180 180
181 181 # Return .d and .i files that do not match the shallow pattern
182 182 match = state.match
183 183 if match and not match.always():
184 184 for entry in repo.store.datafiles():
185 185 if not entry.is_revlog:
186 186 continue
187 187 if not state.match(entry.target_id):
188 188 yield entry
189 189
190 190 for x in repo.store.topfiles():
191 191 if state.noflatmf and x[1][:11] == b'00manifest.':
192 192 continue
193 193 yield x
194 194
195 195 elif shallowutil.isenabled(repo):
196 196 # don't allow cloning from a shallow repo to a full repo
197 197 # since it would require fetching every version of every
198 198 # file in order to create the revlogs.
199 199 raise error.Abort(
200 200 _(b"Cannot clone from a shallow repo to a full repo.")
201 201 )
202 202 else:
203 203 for x in orig(repo, matcher):
204 204 yield x
205 205
206 206 extensions.wrapfunction(streamclone, b'_walkstreamfiles', _walkstreamfiles)
207 207
208 208 # expose remotefilelog capabilities
209 209 def _capabilities(orig, repo, proto):
210 210 caps = orig(repo, proto)
211 211 if shallowutil.isenabled(repo) or ui.configbool(
212 212 b'remotefilelog', b'server'
213 213 ):
214 214 if isinstance(proto, _sshv1server):
215 215 # legacy getfiles method which only works over ssh
216 216 caps.append(constants.NETWORK_CAP_LEGACY_SSH_GETFILES)
217 217 caps.append(b'x_rfl_getflogheads')
218 218 caps.append(b'x_rfl_getfile')
219 219 return caps
220 220
221 221 extensions.wrapfunction(wireprotov1server, b'_capabilities', _capabilities)
222 222
223 223 def _adjustlinkrev(orig, self, *args, **kwargs):
224 224 # When generating file blobs, taking the real path is too slow on large
225 225 # repos, so force it to just return the linkrev directly.
226 226 repo = self._repo
227 227 if util.safehasattr(repo, b'forcelinkrev') and repo.forcelinkrev:
228 228 return self._filelog.linkrev(self._filelog.rev(self._filenode))
229 229 return orig(self, *args, **kwargs)
230 230
231 231 extensions.wrapfunction(
232 232 context.basefilectx, b'_adjustlinkrev', _adjustlinkrev
233 233 )
234 234
235 235 def _iscmd(orig, cmd):
236 236 if cmd == b'x_rfl_getfiles':
237 237 return False
238 238 return orig(cmd)
239 239
240 240 extensions.wrapfunction(wireprotoserver, b'iscmd', _iscmd)
241 241
242 242
243 243 def _loadfileblob(repo, cachepath, path, node):
244 244 filecachepath = os.path.join(cachepath, path, hex(node))
245 245 if not os.path.exists(filecachepath) or os.path.getsize(filecachepath) == 0:
246 246 filectx = repo.filectx(path, fileid=node)
247 247 if filectx.node() == repo.nullid:
248 248 repo.changelog = changelog.changelog(repo.svfs)
249 249 filectx = repo.filectx(path, fileid=node)
250 250
251 251 text = createfileblob(filectx)
252 252 # TODO configurable compression engines
253 253 text = zlib.compress(text)
254 254
255 255 # everything should be user & group read/writable
256 256 oldumask = os.umask(0o002)
257 257 try:
258 258 dirname = os.path.dirname(filecachepath)
259 259 if not os.path.exists(dirname):
260 260 try:
261 261 os.makedirs(dirname)
262 262 except FileExistsError:
263 263 pass
264 264
265 265 f = None
266 266 try:
267 267 f = util.atomictempfile(filecachepath, b"wb")
268 268 f.write(text)
269 269 except (IOError, OSError):
270 270 # Don't abort if the user only has permission to read,
271 271 # and not write.
272 272 pass
273 273 finally:
274 274 if f:
275 275 f.close()
276 276 finally:
277 277 os.umask(oldumask)
278 278 else:
279 279 with open(filecachepath, b"rb") as f:
280 280 text = f.read()
281 281 return text
282 282
283 283
284 284 def getflogheads(repo, proto, path):
285 285 """A server api for requesting a filelog's heads"""
286 286 flog = repo.file(path)
287 287 heads = flog.heads()
288 288 return b'\n'.join((hex(head) for head in heads if head != repo.nullid))
289 289
290 290
291 291 def getfile(repo, proto, file, node):
292 292 """A server api for requesting a particular version of a file. Can be used
293 293 in batches to request many files at once. The return protocol is:
294 294 <errorcode>\0<data/errormsg> where <errorcode> is 0 for success or
295 295 non-zero for an error.
296 296
297 297 data is a compressed blob with revlog flag and ancestors information. See
298 298 createfileblob for its content.
299 299 """
300 300 if shallowutil.isenabled(repo):
301 301 return b'1\0' + _(b'cannot fetch remote files from shallow repo')
302 302 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
303 303 if not cachepath:
304 304 cachepath = os.path.join(repo.path, b"remotefilelogcache")
305 305 node = bin(node.strip())
306 306 if node == repo.nullid:
307 307 return b'0\0'
308 308 return b'0\0' + _loadfileblob(repo, cachepath, file, node)
309 309
310 310
311 311 def getfiles(repo, proto):
312 312 """A server api for requesting particular versions of particular files."""
313 313 if shallowutil.isenabled(repo):
314 314 raise error.Abort(_(b'cannot fetch remote files from shallow repo'))
315 315 if not isinstance(proto, _sshv1server):
316 316 raise error.Abort(_(b'cannot fetch remote files over non-ssh protocol'))
317 317
318 318 def streamer():
319 319 fin = proto._fin
320 320
321 321 cachepath = repo.ui.config(b"remotefilelog", b"servercachepath")
322 322 if not cachepath:
323 323 cachepath = os.path.join(repo.path, b"remotefilelogcache")
324 324
325 325 while True:
326 326 request = fin.readline()[:-1]
327 327 if not request:
328 328 break
329 329
330 330 node = bin(request[:40])
331 331 if node == repo.nullid:
332 332 yield b'0\n'
333 333 continue
334 334
335 335 path = request[40:]
336 336
337 337 text = _loadfileblob(repo, cachepath, path, node)
338 338
339 339 yield b'%d\n%s' % (len(text), text)
340 340
341 341 # it would be better to only flush after processing a whole batch
342 342 # but currently we don't know if there are more requests coming
343 343 proto._fout.flush()
344 344
345 345 return wireprototypes.streamres(streamer())
346 346
347 347
348 348 def createfileblob(filectx):
349 349 """
350 350 format:
351 351 v0:
352 352 str(len(rawtext)) + '\0' + rawtext + ancestortext
353 353 v1:
354 354 'v1' + '\n' + metalist + '\0' + rawtext + ancestortext
355 355 metalist := metalist + '\n' + meta | meta
356 356 meta := sizemeta | flagmeta
357 357 sizemeta := METAKEYSIZE + str(len(rawtext))
358 358 flagmeta := METAKEYFLAG + str(flag)
359 359
360 360 note: sizemeta must exist. METAKEYFLAG and METAKEYSIZE must have a
361 361 length of 1.
362 362 """
363 363 flog = filectx.filelog()
364 364 frev = filectx.filerev()
365 365 revlogflags = flog._revlog.flags(frev)
366 366 if revlogflags == 0:
367 367 # normal files
368 368 text = filectx.data()
369 369 else:
370 370 # lfs, read raw revision data
371 371 text = flog.rawdata(frev)
372 372
373 373 repo = filectx._repo
374 374
375 375 ancestors = [filectx]
376 376
377 377 try:
378 378 repo.forcelinkrev = True
379 379 ancestors.extend([f for f in filectx.ancestors()])
380 380
381 381 ancestortext = b""
382 382 for ancestorctx in ancestors:
383 383 parents = ancestorctx.parents()
384 384 p1 = repo.nullid
385 385 p2 = repo.nullid
386 386 if len(parents) > 0:
387 387 p1 = parents[0].filenode()
388 388 if len(parents) > 1:
389 389 p2 = parents[1].filenode()
390 390
391 391 copyname = b""
392 392 rename = ancestorctx.renamed()
393 393 if rename:
394 394 copyname = rename[0]
395 395 linknode = ancestorctx.node()
396 396 ancestortext += b"%s%s%s%s%s\0" % (
397 397 ancestorctx.filenode(),
398 398 p1,
399 399 p2,
400 400 linknode,
401 401 copyname,
402 402 )
403 403 finally:
404 404 repo.forcelinkrev = False
405 405
406 406 header = shallowutil.buildfileblobheader(len(text), revlogflags)
407 407
408 408 return b"%s\0%s%s" % (header, text, ancestortext)
409 409
410 410
411 411 def gcserver(ui, repo):
412 412 if not repo.ui.configbool(b"remotefilelog", b"server"):
413 413 return
414 414
415 415 neededfiles = set()
416 416 heads = repo.revs(b"heads(tip~25000:) - null")
417 417
418 418 cachepath = repo.vfs.join(b"remotefilelogcache")
419 419 for head in heads:
420 420 mf = repo[head].manifest()
421 421 for filename, filenode in mf.items():
422 422 filecachepath = os.path.join(cachepath, filename, hex(filenode))
423 423 neededfiles.add(filecachepath)
424 424
425 425 # delete unneeded older files
426 426 days = repo.ui.configint(b"remotefilelog", b"serverexpiration")
427 427 expiration = time.time() - (days * 24 * 60 * 60)
428 428
429 429 progress = ui.makeprogress(_(b"removing old server cache"), unit=b"files")
430 430 progress.update(0)
431 431 for root, dirs, files in os.walk(cachepath):
432 432 for file in files:
433 433 filepath = os.path.join(root, file)
434 434 progress.increment()
435 435 if filepath in neededfiles:
436 436 continue
437 437
438 438 stat = os.stat(filepath)
439 439 if stat.st_mtime < expiration:
440 440 os.remove(filepath)
441 441
442 442 progress.complete()
@@ -1,1056 +1,1056 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _match_tracked_entry(entry, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 if entry.revlog_type == FILEFLAGS_FILELOG:
45 45 return matcher(entry.target_id)
46 46 elif entry.revlog_type == FILEFLAGS_MANIFESTLOG:
47 47 return matcher.visitdir(entry.target_id.rstrip(b'/'))
48 48 raise error.ProgrammingError(b"cannot process entry %r" % entry)
49 49
50 50
51 51 # This avoids a collision between a file named foo and a dir named
52 52 # foo.i or foo.d
53 53 def _encodedir(path):
54 54 """
55 55 >>> _encodedir(b'data/foo.i')
56 56 'data/foo.i'
57 57 >>> _encodedir(b'data/foo.i/bla.i')
58 58 'data/foo.i.hg/bla.i'
59 59 >>> _encodedir(b'data/foo.i.hg/bla.i')
60 60 'data/foo.i.hg.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
62 62 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
63 63 """
64 64 return (
65 65 path.replace(b".hg/", b".hg.hg/")
66 66 .replace(b".i/", b".i.hg/")
67 67 .replace(b".d/", b".d.hg/")
68 68 )
69 69
70 70
71 71 encodedir = getattr(parsers, 'encodedir', _encodedir)
72 72
73 73
74 74 def decodedir(path):
75 75 """
76 76 >>> decodedir(b'data/foo.i')
77 77 'data/foo.i'
78 78 >>> decodedir(b'data/foo.i.hg/bla.i')
79 79 'data/foo.i/bla.i'
80 80 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
81 81 'data/foo.i.hg/bla.i'
82 82 """
83 83 if b".hg/" not in path:
84 84 return path
85 85 return (
86 86 path.replace(b".d.hg/", b".d/")
87 87 .replace(b".i.hg/", b".i/")
88 88 .replace(b".hg.hg/", b".hg/")
89 89 )
90 90
91 91
92 92 def _reserved():
93 93 """characters that are problematic for filesystems
94 94
95 95 * ascii escapes (0..31)
96 96 * ascii hi (126..255)
97 97 * windows specials
98 98
99 99 these characters will be escaped by encodefunctions
100 100 """
101 101 winreserved = [ord(x) for x in u'\\:*?"<>|']
102 102 for x in range(32):
103 103 yield x
104 104 for x in range(126, 256):
105 105 yield x
106 106 for x in winreserved:
107 107 yield x
108 108
109 109
110 110 def _buildencodefun():
111 111 """
112 112 >>> enc, dec = _buildencodefun()
113 113
114 114 >>> enc(b'nothing/special.txt')
115 115 'nothing/special.txt'
116 116 >>> dec(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118
119 119 >>> enc(b'HELLO')
120 120 '_h_e_l_l_o'
121 121 >>> dec(b'_h_e_l_l_o')
122 122 'HELLO'
123 123
124 124 >>> enc(b'hello:world?')
125 125 'hello~3aworld~3f'
126 126 >>> dec(b'hello~3aworld~3f')
127 127 'hello:world?'
128 128
129 129 >>> enc(b'the\\x07quick\\xADshot')
130 130 'the~07quick~adshot'
131 131 >>> dec(b'the~07quick~adshot')
132 132 'the\\x07quick\\xadshot'
133 133 """
134 134 e = b'_'
135 135 xchr = pycompat.bytechr
136 136 asciistr = list(map(xchr, range(127)))
137 137 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
138 138
139 139 cmap = {x: x for x in asciistr}
140 140 for x in _reserved():
141 141 cmap[xchr(x)] = b"~%02x" % x
142 142 for x in capitals + [ord(e)]:
143 143 cmap[xchr(x)] = e + xchr(x).lower()
144 144
145 145 dmap = {}
146 146 for k, v in cmap.items():
147 147 dmap[v] = k
148 148
149 149 def decode(s):
150 150 i = 0
151 151 while i < len(s):
152 152 for l in range(1, 4):
153 153 try:
154 154 yield dmap[s[i : i + l]]
155 155 i += l
156 156 break
157 157 except KeyError:
158 158 pass
159 159 else:
160 160 raise KeyError
161 161
162 162 return (
163 163 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
164 164 lambda s: b''.join(list(decode(s))),
165 165 )
166 166
167 167
168 168 _encodefname, _decodefname = _buildencodefun()
169 169
170 170
171 171 def encodefilename(s):
172 172 """
173 173 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
174 174 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
175 175 """
176 176 return _encodefname(encodedir(s))
177 177
178 178
179 179 def decodefilename(s):
180 180 """
181 181 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
182 182 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
183 183 """
184 184 return decodedir(_decodefname(s))
185 185
186 186
187 187 def _buildlowerencodefun():
188 188 """
189 189 >>> f = _buildlowerencodefun()
190 190 >>> f(b'nothing/special.txt')
191 191 'nothing/special.txt'
192 192 >>> f(b'HELLO')
193 193 'hello'
194 194 >>> f(b'hello:world?')
195 195 'hello~3aworld~3f'
196 196 >>> f(b'the\\x07quick\\xADshot')
197 197 'the~07quick~adshot'
198 198 """
199 199 xchr = pycompat.bytechr
200 200 cmap = {xchr(x): xchr(x) for x in range(127)}
201 201 for x in _reserved():
202 202 cmap[xchr(x)] = b"~%02x" % x
203 203 for x in range(ord(b"A"), ord(b"Z") + 1):
204 204 cmap[xchr(x)] = xchr(x).lower()
205 205
206 206 def lowerencode(s):
207 207 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
208 208
209 209 return lowerencode
210 210
211 211
212 212 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
213 213
214 214 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
215 215 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
216 216 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
217 217
218 218
219 219 def _auxencode(path, dotencode):
220 220 """
221 221 Encodes filenames containing names reserved by Windows or which end in
222 222 period or space. Does not touch other single reserved characters c.
223 223 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
224 224 Additionally encodes space or period at the beginning, if dotencode is
225 225 True. Parameter path is assumed to be all lowercase.
226 226 A segment only needs encoding if a reserved name appears as a
227 227 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
228 228 doesn't need encoding.
229 229
230 230 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
231 231 >>> _auxencode(s.split(b'/'), True)
232 232 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
233 233 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
234 234 >>> _auxencode(s.split(b'/'), False)
235 235 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
236 236 >>> _auxencode([b'foo. '], True)
237 237 ['foo.~20']
238 238 >>> _auxencode([b' .foo'], True)
239 239 ['~20.foo']
240 240 """
241 241 for i, n in enumerate(path):
242 242 if not n:
243 243 continue
244 244 if dotencode and n[0] in b'. ':
245 245 n = b"~%02x" % ord(n[0:1]) + n[1:]
246 246 path[i] = n
247 247 else:
248 248 l = n.find(b'.')
249 249 if l == -1:
250 250 l = len(n)
251 251 if (l == 3 and n[:3] in _winres3) or (
252 252 l == 4
253 253 and n[3:4] <= b'9'
254 254 and n[3:4] >= b'1'
255 255 and n[:3] in _winres4
256 256 ):
257 257 # encode third letter ('aux' -> 'au~78')
258 258 ec = b"~%02x" % ord(n[2:3])
259 259 n = n[0:2] + ec + n[3:]
260 260 path[i] = n
261 261 if n[-1] in b'. ':
262 262 # encode last period or space ('foo...' -> 'foo..~2e')
263 263 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
264 264 return path
265 265
266 266
267 267 _maxstorepathlen = 120
268 268 _dirprefixlen = 8
269 269 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
270 270
271 271
272 272 def _hashencode(path, dotencode):
273 273 digest = hex(hashutil.sha1(path).digest())
274 274 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
275 275 parts = _auxencode(le, dotencode)
276 276 basename = parts[-1]
277 277 _root, ext = os.path.splitext(basename)
278 278 sdirs = []
279 279 sdirslen = 0
280 280 for p in parts[:-1]:
281 281 d = p[:_dirprefixlen]
282 282 if d[-1] in b'. ':
283 283 # Windows can't access dirs ending in period or space
284 284 d = d[:-1] + b'_'
285 285 if sdirslen == 0:
286 286 t = len(d)
287 287 else:
288 288 t = sdirslen + 1 + len(d)
289 289 if t > _maxshortdirslen:
290 290 break
291 291 sdirs.append(d)
292 292 sdirslen = t
293 293 dirs = b'/'.join(sdirs)
294 294 if len(dirs) > 0:
295 295 dirs += b'/'
296 296 res = b'dh/' + dirs + digest + ext
297 297 spaceleft = _maxstorepathlen - len(res)
298 298 if spaceleft > 0:
299 299 filler = basename[:spaceleft]
300 300 res = b'dh/' + dirs + filler + digest + ext
301 301 return res
302 302
303 303
304 304 def _hybridencode(path, dotencode):
305 305 """encodes path with a length limit
306 306
307 307 Encodes all paths that begin with 'data/', according to the following.
308 308
309 309 Default encoding (reversible):
310 310
311 311 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
312 312 characters are encoded as '~xx', where xx is the two digit hex code
313 313 of the character (see encodefilename).
314 314 Relevant path components consisting of Windows reserved filenames are
315 315 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
316 316
317 317 Hashed encoding (not reversible):
318 318
319 319 If the default-encoded path is longer than _maxstorepathlen, a
320 320 non-reversible hybrid hashing of the path is done instead.
321 321 This encoding uses up to _dirprefixlen characters of all directory
322 322 levels of the lowerencoded path, but not more levels than can fit into
323 323 _maxshortdirslen.
324 324 Then follows the filler followed by the sha digest of the full path.
325 325 The filler is the beginning of the basename of the lowerencoded path
326 326 (the basename is everything after the last path separator). The filler
327 327 is as long as possible, filling in characters from the basename until
328 328 the encoded path has _maxstorepathlen characters (or all chars of the
329 329 basename have been taken).
330 330 The extension (e.g. '.i' or '.d') is preserved.
331 331
332 332 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
333 333 encoding was used.
334 334 """
335 335 path = encodedir(path)
336 336 ef = _encodefname(path).split(b'/')
337 337 res = b'/'.join(_auxencode(ef, dotencode))
338 338 if len(res) > _maxstorepathlen:
339 339 res = _hashencode(path, dotencode)
340 340 return res
341 341
342 342
343 343 def _pathencode(path):
344 344 de = encodedir(path)
345 345 if len(path) > _maxstorepathlen:
346 346 return _hashencode(de, True)
347 347 ef = _encodefname(de).split(b'/')
348 348 res = b'/'.join(_auxencode(ef, True))
349 349 if len(res) > _maxstorepathlen:
350 350 return _hashencode(de, True)
351 351 return res
352 352
353 353
354 354 _pathencode = getattr(parsers, 'pathencode', _pathencode)
355 355
356 356
357 357 def _plainhybridencode(f):
358 358 return _hybridencode(f, False)
359 359
360 360
361 361 def _calcmode(vfs):
362 362 try:
363 363 # files in .hg/ will be created using this mode
364 364 mode = vfs.stat().st_mode
365 365 # avoid some useless chmods
366 366 if (0o777 & ~util.umask) == (0o777 & mode):
367 367 mode = None
368 368 except OSError:
369 369 mode = None
370 370 return mode
371 371
372 372
373 373 _data = [
374 374 b'bookmarks',
375 375 b'narrowspec',
376 376 b'data',
377 377 b'meta',
378 378 b'00manifest.d',
379 379 b'00manifest.i',
380 380 b'00changelog.d',
381 381 b'00changelog.i',
382 382 b'phaseroots',
383 383 b'obsstore',
384 384 b'requires',
385 385 ]
386 386
387 387 REVLOG_FILES_MAIN_EXT = (b'.i',)
388 388 REVLOG_FILES_OTHER_EXT = (
389 389 b'.idx',
390 390 b'.d',
391 391 b'.dat',
392 392 b'.n',
393 393 b'.nd',
394 394 b'.sda',
395 395 )
396 396 # file extension that also use a `-SOMELONGIDHASH.ext` form
397 397 REVLOG_FILES_LONG_EXT = (
398 398 b'.nd',
399 399 b'.idx',
400 400 b'.dat',
401 401 b'.sda',
402 402 )
403 403 # files that are "volatile" and might change between listing and streaming
404 404 #
405 405 # note: the ".nd" file are nodemap data and won't "change" but they might be
406 406 # deleted.
407 407 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
408 408
409 409 # some exception to the above matching
410 410 #
411 411 # XXX This is currently not in use because of issue6542
412 412 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
413 413
414 414
415 415 def is_revlog(f, kind, st):
416 416 if kind != stat.S_IFREG:
417 417 return None
418 418 return revlog_type(f)
419 419
420 420
421 421 def revlog_type(f):
422 422 # XXX we need to filter `undo.` created by the transaction here, however
423 423 # being naive about it also filter revlog for `undo.*` files, leading to
424 424 # issue6542. So we no longer use EXCLUDED.
425 425 if f.endswith(REVLOG_FILES_MAIN_EXT):
426 426 return FILEFLAGS_REVLOG_MAIN
427 427 elif f.endswith(REVLOG_FILES_OTHER_EXT):
428 428 t = FILETYPE_FILELOG_OTHER
429 429 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
430 430 t |= FILEFLAGS_VOLATILE
431 431 return t
432 432 return None
433 433
434 434
435 435 # the file is part of changelog data
436 436 FILEFLAGS_CHANGELOG = 1 << 13
437 437 # the file is part of manifest data
438 438 FILEFLAGS_MANIFESTLOG = 1 << 12
439 439 # the file is part of filelog data
440 440 FILEFLAGS_FILELOG = 1 << 11
441 441 # file that are not directly part of a revlog
442 442 FILEFLAGS_OTHER = 1 << 10
443 443
444 444 # the main entry point for a revlog
445 445 FILEFLAGS_REVLOG_MAIN = 1 << 1
446 446 # a secondary file for a revlog
447 447 FILEFLAGS_REVLOG_OTHER = 1 << 0
448 448
449 449 # files that are "volatile" and might change between listing and streaming
450 450 FILEFLAGS_VOLATILE = 1 << 20
451 451
452 452 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
453 453 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
454 454 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
457 457 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
458 458 FILETYPE_OTHER = FILEFLAGS_OTHER
459 459
460 460
461 461 @attr.s(slots=True, init=False)
462 462 class BaseStoreEntry:
463 463 """An entry in the store
464 464
465 465 This is returned by `store.walk` and represent some data in the store."""
466 466
467 unencoded_path = attr.ib()
467 _entry_path = attr.ib()
468 468 _is_volatile = attr.ib(default=False)
469 469 _file_size = attr.ib(default=None)
470 470
471 471 def __init__(
472 472 self,
473 unencoded_path,
473 entry_path,
474 474 is_volatile=False,
475 475 file_size=None,
476 476 ):
477 self.unencoded_path = unencoded_path
477 self._entry_path = entry_path
478 478 self._is_volatile = is_volatile
479 479 self._file_size = file_size
480 480
481 481 def files(self):
482 482 return [
483 483 StoreFile(
484 unencoded_path=self.unencoded_path,
484 unencoded_path=self._entry_path,
485 485 file_size=self._file_size,
486 486 is_volatile=self._is_volatile,
487 487 )
488 488 ]
489 489
490 490
491 491 @attr.s(slots=True, init=False)
492 492 class SimpleStoreEntry(BaseStoreEntry):
493 493 """A generic entry in the store"""
494 494
495 495 is_revlog = False
496 496
497 497
498 498 @attr.s(slots=True, init=False)
499 499 class RevlogStoreEntry(BaseStoreEntry):
500 500 """A revlog entry in the store"""
501 501
502 502 is_revlog = True
503 503 revlog_type = attr.ib(default=None)
504 504 target_id = attr.ib(default=None)
505 505 is_revlog_main = attr.ib(default=None)
506 506
507 507 def __init__(
508 508 self,
509 unencoded_path,
509 entry_path,
510 510 revlog_type,
511 511 target_id,
512 512 is_revlog_main=False,
513 513 is_volatile=False,
514 514 file_size=None,
515 515 ):
516 516 super().__init__(
517 unencoded_path=unencoded_path,
517 entry_path=entry_path,
518 518 is_volatile=is_volatile,
519 519 file_size=file_size,
520 520 )
521 521 self.revlog_type = revlog_type
522 522 self.target_id = target_id
523 523 self.is_revlog_main = is_revlog_main
524 524
525 525 def main_file_path(self):
526 526 """unencoded path of the main revlog file"""
527 return self.unencoded_path
527 return self._entry_path
528 528
529 529
530 530 @attr.s(slots=True)
531 531 class StoreFile:
532 532 """a file matching an entry"""
533 533
534 534 unencoded_path = attr.ib()
535 535 _file_size = attr.ib(default=False)
536 536 is_volatile = attr.ib(default=False)
537 537
538 538 def file_size(self, vfs):
539 539 if self._file_size is not None:
540 540 return self._file_size
541 541 try:
542 542 return vfs.stat(self.unencoded_path).st_size
543 543 except FileNotFoundError:
544 544 return 0
545 545
546 546
547 547 def _gather_revlog(files_data):
548 548 """group files per revlog prefix
549 549
550 550 The returns a two level nested dict. The top level key is the revlog prefix
551 551 without extension, the second level is all the file "suffix" that were
552 552 seen for this revlog and arbitrary file data as value.
553 553 """
554 554 revlogs = collections.defaultdict(dict)
555 555 for u, value in files_data:
556 556 name, ext = _split_revlog_ext(u)
557 557 revlogs[name][ext] = value
558 558 return sorted(revlogs.items())
559 559
560 560
561 561 def _split_revlog_ext(filename):
562 562 """split the revlog file prefix from the variable extension"""
563 563 if filename.endswith(REVLOG_FILES_LONG_EXT):
564 564 char = b'-'
565 565 else:
566 566 char = b'.'
567 567 idx = filename.rfind(char)
568 568 return filename[:idx], filename[idx:]
569 569
570 570
571 571 def _ext_key(ext):
572 572 """a key to order revlog suffix
573 573
574 574 important to issue .i after other entry."""
575 575 # the only important part of this order is to keep the `.i` last.
576 576 if ext.endswith(b'.n'):
577 577 return (0, ext)
578 578 elif ext.endswith(b'.nd'):
579 579 return (10, ext)
580 580 elif ext.endswith(b'.d'):
581 581 return (20, ext)
582 582 elif ext.endswith(b'.i'):
583 583 return (50, ext)
584 584 else:
585 585 return (40, ext)
586 586
587 587
588 588 class basicstore:
589 589 '''base class for local repository stores'''
590 590
591 591 def __init__(self, path, vfstype):
592 592 vfs = vfstype(path)
593 593 self.path = vfs.base
594 594 self.createmode = _calcmode(vfs)
595 595 vfs.createmode = self.createmode
596 596 self.rawvfs = vfs
597 597 self.vfs = vfsmod.filtervfs(vfs, encodedir)
598 598 self.opener = self.vfs
599 599
600 600 def join(self, f):
601 601 return self.path + b'/' + encodedir(f)
602 602
603 603 def _walk(self, relpath, recurse, undecodable=None):
604 604 '''yields (revlog_type, unencoded, size)'''
605 605 path = self.path
606 606 if relpath:
607 607 path += b'/' + relpath
608 608 striplen = len(self.path) + 1
609 609 l = []
610 610 if self.rawvfs.isdir(path):
611 611 visit = [path]
612 612 readdir = self.rawvfs.readdir
613 613 while visit:
614 614 p = visit.pop()
615 615 for f, kind, st in readdir(p, stat=True):
616 616 fp = p + b'/' + f
617 617 rl_type = is_revlog(f, kind, st)
618 618 if rl_type is not None:
619 619 n = util.pconvert(fp[striplen:])
620 620 l.append((decodedir(n), (rl_type, st.st_size)))
621 621 elif kind == stat.S_IFDIR and recurse:
622 622 visit.append(fp)
623 623
624 624 l.sort()
625 625 return l
626 626
627 627 def changelog(self, trypending, concurrencychecker=None):
628 628 return changelog.changelog(
629 629 self.vfs,
630 630 trypending=trypending,
631 631 concurrencychecker=concurrencychecker,
632 632 )
633 633
634 634 def manifestlog(self, repo, storenarrowmatch):
635 635 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
636 636 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
637 637
638 638 def datafiles(
639 639 self, matcher=None, undecodable=None
640 640 ) -> Generator[BaseStoreEntry, None, None]:
641 641 """Like walk, but excluding the changelog and root manifest.
642 642
643 643 When [undecodable] is None, revlogs names that can't be
644 644 decoded cause an exception. When it is provided, it should
645 645 be a list and the filenames that can't be decoded are added
646 646 to it instead. This is very rarely needed."""
647 647 dirs = [
648 648 (b'data', FILEFLAGS_FILELOG),
649 649 (b'meta', FILEFLAGS_MANIFESTLOG),
650 650 ]
651 651 for base_dir, rl_type in dirs:
652 652 files = self._walk(base_dir, True, undecodable=undecodable)
653 653 files = (f for f in files if f[1][0] is not None)
654 654 for revlog, details in _gather_revlog(files):
655 655 for ext, (t, s) in sorted(details.items()):
656 656 u = revlog + ext
657 657 revlog_target_id = revlog.split(b'/', 1)[1]
658 658 yield RevlogStoreEntry(
659 unencoded_path=u,
659 entry_path=u,
660 660 revlog_type=rl_type,
661 661 target_id=revlog_target_id,
662 662 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
663 663 is_volatile=bool(t & FILEFLAGS_VOLATILE),
664 664 file_size=s,
665 665 )
666 666
667 667 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
668 668 files = reversed(self._walk(b'', False))
669 669
670 670 changelogs = collections.defaultdict(dict)
671 671 manifestlogs = collections.defaultdict(dict)
672 672
673 673 for u, (t, s) in files:
674 674 if u.startswith(b'00changelog'):
675 675 name, ext = _split_revlog_ext(u)
676 676 changelogs[name][ext] = (t, s)
677 677 elif u.startswith(b'00manifest'):
678 678 name, ext = _split_revlog_ext(u)
679 679 manifestlogs[name][ext] = (t, s)
680 680 else:
681 681 yield SimpleStoreEntry(
682 unencoded_path=u,
682 entry_path=u,
683 683 is_volatile=bool(t & FILEFLAGS_VOLATILE),
684 684 file_size=s,
685 685 )
686 686 # yield manifest before changelog
687 687 top_rl = [
688 688 (manifestlogs, FILEFLAGS_MANIFESTLOG),
689 689 (changelogs, FILEFLAGS_CHANGELOG),
690 690 ]
691 691 assert len(manifestlogs) <= 1
692 692 assert len(changelogs) <= 1
693 693 for data, revlog_type in top_rl:
694 694 for revlog, details in sorted(data.items()):
695 695 # (keeping ordering so we get 00changelog.i last)
696 696 key = lambda x: _ext_key(x[0])
697 697 for ext, (t, s) in sorted(details.items(), key=key):
698 698 u = revlog + ext
699 699 yield RevlogStoreEntry(
700 unencoded_path=u,
700 entry_path=u,
701 701 revlog_type=revlog_type,
702 702 target_id=b'',
703 703 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
704 704 is_volatile=bool(t & FILEFLAGS_VOLATILE),
705 705 file_size=s,
706 706 )
707 707
708 708 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
709 709 """return files related to data storage (ie: revlogs)
710 710
711 711 yields (file_type, unencoded, size)
712 712
713 713 if a matcher is passed, storage files of only those tracked paths
714 714 are passed with matches the matcher
715 715 """
716 716 # yield data files first
717 717 for x in self.datafiles(matcher):
718 718 yield x
719 719 for x in self.topfiles():
720 720 yield x
721 721
722 722 def copylist(self):
723 723 return _data
724 724
725 725 def write(self, tr):
726 726 pass
727 727
728 728 def invalidatecaches(self):
729 729 pass
730 730
731 731 def markremoved(self, fn):
732 732 pass
733 733
734 734 def __contains__(self, path):
735 735 '''Checks if the store contains path'''
736 736 path = b"/".join((b"data", path))
737 737 # file?
738 738 if self.vfs.exists(path + b".i"):
739 739 return True
740 740 # dir?
741 741 if not path.endswith(b"/"):
742 742 path = path + b"/"
743 743 return self.vfs.exists(path)
744 744
745 745
746 746 class encodedstore(basicstore):
747 747 def __init__(self, path, vfstype):
748 748 vfs = vfstype(path + b'/store')
749 749 self.path = vfs.base
750 750 self.createmode = _calcmode(vfs)
751 751 vfs.createmode = self.createmode
752 752 self.rawvfs = vfs
753 753 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
754 754 self.opener = self.vfs
755 755
756 756 def _walk(self, relpath, recurse, undecodable=None):
757 757 old = super()._walk(relpath, recurse)
758 758 new = []
759 759 for f1, value in old:
760 760 try:
761 761 f2 = decodefilename(f1)
762 762 except KeyError:
763 763 if undecodable is None:
764 764 msg = _(b'undecodable revlog name %s') % f1
765 765 raise error.StorageError(msg)
766 766 else:
767 767 undecodable.append(f1)
768 768 continue
769 769 new.append((f2, value))
770 770 return new
771 771
772 772 def datafiles(
773 773 self, matcher=None, undecodable=None
774 774 ) -> Generator[BaseStoreEntry, None, None]:
775 775 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
776 776 for entry in entries:
777 777 if _match_tracked_entry(entry, matcher):
778 778 yield entry
779 779
780 780 def join(self, f):
781 781 return self.path + b'/' + encodefilename(f)
782 782
783 783 def copylist(self):
784 784 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
785 785
786 786
787 787 class fncache:
788 788 # the filename used to be partially encoded
789 789 # hence the encodedir/decodedir dance
790 790 def __init__(self, vfs):
791 791 self.vfs = vfs
792 792 self._ignores = set()
793 793 self.entries = None
794 794 self._dirty = False
795 795 # set of new additions to fncache
796 796 self.addls = set()
797 797
798 798 def ensureloaded(self, warn=None):
799 799 """read the fncache file if not already read.
800 800
801 801 If the file on disk is corrupted, raise. If warn is provided,
802 802 warn and keep going instead."""
803 803 if self.entries is None:
804 804 self._load(warn)
805 805
806 806 def _load(self, warn=None):
807 807 '''fill the entries from the fncache file'''
808 808 self._dirty = False
809 809 try:
810 810 fp = self.vfs(b'fncache', mode=b'rb')
811 811 except IOError:
812 812 # skip nonexistent file
813 813 self.entries = set()
814 814 return
815 815
816 816 self.entries = set()
817 817 chunk = b''
818 818 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
819 819 chunk += c
820 820 try:
821 821 p = chunk.rindex(b'\n')
822 822 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
823 823 chunk = chunk[p + 1 :]
824 824 except ValueError:
825 825 # substring '\n' not found, maybe the entry is bigger than the
826 826 # chunksize, so let's keep iterating
827 827 pass
828 828
829 829 if chunk:
830 830 msg = _(b"fncache does not ends with a newline")
831 831 if warn:
832 832 warn(msg + b'\n')
833 833 else:
834 834 raise error.Abort(
835 835 msg,
836 836 hint=_(
837 837 b"use 'hg debugrebuildfncache' to "
838 838 b"rebuild the fncache"
839 839 ),
840 840 )
841 841 self._checkentries(fp, warn)
842 842 fp.close()
843 843
844 844 def _checkentries(self, fp, warn):
845 845 """make sure there is no empty string in entries"""
846 846 if b'' in self.entries:
847 847 fp.seek(0)
848 848 for n, line in enumerate(fp):
849 849 if not line.rstrip(b'\n'):
850 850 t = _(b'invalid entry in fncache, line %d') % (n + 1)
851 851 if warn:
852 852 warn(t + b'\n')
853 853 else:
854 854 raise error.Abort(t)
855 855
856 856 def write(self, tr):
857 857 if self._dirty:
858 858 assert self.entries is not None
859 859 self.entries = self.entries | self.addls
860 860 self.addls = set()
861 861 tr.addbackup(b'fncache')
862 862 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
863 863 if self.entries:
864 864 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
865 865 fp.close()
866 866 self._dirty = False
867 867 if self.addls:
868 868 # if we have just new entries, let's append them to the fncache
869 869 tr.addbackup(b'fncache')
870 870 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
871 871 if self.addls:
872 872 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
873 873 fp.close()
874 874 self.entries = None
875 875 self.addls = set()
876 876
877 877 def addignore(self, fn):
878 878 self._ignores.add(fn)
879 879
880 880 def add(self, fn):
881 881 if fn in self._ignores:
882 882 return
883 883 if self.entries is None:
884 884 self._load()
885 885 if fn not in self.entries:
886 886 self.addls.add(fn)
887 887
888 888 def remove(self, fn):
889 889 if self.entries is None:
890 890 self._load()
891 891 if fn in self.addls:
892 892 self.addls.remove(fn)
893 893 return
894 894 try:
895 895 self.entries.remove(fn)
896 896 self._dirty = True
897 897 except KeyError:
898 898 pass
899 899
900 900 def __contains__(self, fn):
901 901 if fn in self.addls:
902 902 return True
903 903 if self.entries is None:
904 904 self._load()
905 905 return fn in self.entries
906 906
907 907 def __iter__(self):
908 908 if self.entries is None:
909 909 self._load()
910 910 return iter(self.entries | self.addls)
911 911
912 912
913 913 class _fncachevfs(vfsmod.proxyvfs):
914 914 def __init__(self, vfs, fnc, encode):
915 915 vfsmod.proxyvfs.__init__(self, vfs)
916 916 self.fncache = fnc
917 917 self.encode = encode
918 918
919 919 def __call__(self, path, mode=b'r', *args, **kw):
920 920 encoded = self.encode(path)
921 921 if (
922 922 mode not in (b'r', b'rb')
923 923 and (path.startswith(b'data/') or path.startswith(b'meta/'))
924 924 and revlog_type(path) is not None
925 925 ):
926 926 # do not trigger a fncache load when adding a file that already is
927 927 # known to exist.
928 928 notload = self.fncache.entries is None and self.vfs.exists(encoded)
929 929 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
930 930 # when appending to an existing file, if the file has size zero,
931 931 # it should be considered as missing. Such zero-size files are
932 932 # the result of truncation when a transaction is aborted.
933 933 notload = False
934 934 if not notload:
935 935 self.fncache.add(path)
936 936 return self.vfs(encoded, mode, *args, **kw)
937 937
938 938 def join(self, path):
939 939 if path:
940 940 return self.vfs.join(self.encode(path))
941 941 else:
942 942 return self.vfs.join(path)
943 943
944 944 def register_file(self, path):
945 945 """generic hook point to lets fncache steer its stew"""
946 946 if path.startswith(b'data/') or path.startswith(b'meta/'):
947 947 self.fncache.add(path)
948 948
949 949
950 950 class fncachestore(basicstore):
951 951 def __init__(self, path, vfstype, dotencode):
952 952 if dotencode:
953 953 encode = _pathencode
954 954 else:
955 955 encode = _plainhybridencode
956 956 self.encode = encode
957 957 vfs = vfstype(path + b'/store')
958 958 self.path = vfs.base
959 959 self.pathsep = self.path + b'/'
960 960 self.createmode = _calcmode(vfs)
961 961 vfs.createmode = self.createmode
962 962 self.rawvfs = vfs
963 963 fnc = fncache(vfs)
964 964 self.fncache = fnc
965 965 self.vfs = _fncachevfs(vfs, fnc, encode)
966 966 self.opener = self.vfs
967 967
968 968 def join(self, f):
969 969 return self.pathsep + self.encode(f)
970 970
971 971 def getsize(self, path):
972 972 return self.rawvfs.stat(path).st_size
973 973
974 974 def datafiles(
975 975 self, matcher=None, undecodable=None
976 976 ) -> Generator[BaseStoreEntry, None, None]:
977 977 files = ((f, revlog_type(f)) for f in self.fncache)
978 978 # Note: all files in fncache should be revlog related, However the
979 979 # fncache might contains such file added by previous version of
980 980 # Mercurial.
981 981 files = (f for f in files if f[1] is not None)
982 982 by_revlog = _gather_revlog(files)
983 983 for revlog, details in by_revlog:
984 984 if revlog.startswith(b'data/'):
985 985 rl_type = FILEFLAGS_FILELOG
986 986 revlog_target_id = revlog.split(b'/', 1)[1]
987 987 elif revlog.startswith(b'meta/'):
988 988 rl_type = FILEFLAGS_MANIFESTLOG
989 989 # drop the initial directory and the `00manifest` file part
990 990 tmp = revlog.split(b'/', 1)[1]
991 991 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
992 992 else:
993 993 # unreachable
994 994 assert False, revlog
995 995 for ext, t in sorted(details.items()):
996 996 f = revlog + ext
997 997 entry = RevlogStoreEntry(
998 unencoded_path=f,
998 entry_path=f,
999 999 revlog_type=rl_type,
1000 1000 target_id=revlog_target_id,
1001 1001 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1002 1002 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1003 1003 )
1004 1004 if _match_tracked_entry(entry, matcher):
1005 1005 yield entry
1006 1006
1007 1007 def copylist(self):
1008 1008 d = (
1009 1009 b'bookmarks',
1010 1010 b'narrowspec',
1011 1011 b'data',
1012 1012 b'meta',
1013 1013 b'dh',
1014 1014 b'fncache',
1015 1015 b'phaseroots',
1016 1016 b'obsstore',
1017 1017 b'00manifest.d',
1018 1018 b'00manifest.i',
1019 1019 b'00changelog.d',
1020 1020 b'00changelog.i',
1021 1021 b'requires',
1022 1022 )
1023 1023 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1024 1024
1025 1025 def write(self, tr):
1026 1026 self.fncache.write(tr)
1027 1027
1028 1028 def invalidatecaches(self):
1029 1029 self.fncache.entries = None
1030 1030 self.fncache.addls = set()
1031 1031
1032 1032 def markremoved(self, fn):
1033 1033 self.fncache.remove(fn)
1034 1034
1035 1035 def _exists(self, f):
1036 1036 ef = self.encode(f)
1037 1037 try:
1038 1038 self.getsize(ef)
1039 1039 return True
1040 1040 except FileNotFoundError:
1041 1041 return False
1042 1042
1043 1043 def __contains__(self, path):
1044 1044 '''Checks if the store contains path'''
1045 1045 path = b"/".join((b"data", path))
1046 1046 # check for files (exact match)
1047 1047 e = path + b'.i'
1048 1048 if e in self.fncache and self._exists(e):
1049 1049 return True
1050 1050 # now check for directories (prefix match)
1051 1051 if not path.endswith(b'/'):
1052 1052 path += b'/'
1053 1053 for e in self.fncache:
1054 1054 if e.startswith(path) and self._exists(e):
1055 1055 return True
1056 1056 return False
General Comments 0
You need to be logged in to leave comments. Login now