##// END OF EJS Templates
store: have custom init for entries class...
marmoute -
r51367:c37450a5 default
parent child Browse files
Show More
@@ -1,937 +1,963
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 398 # files that are "volatile" and might change between listing and streaming
399 399 #
400 400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 401 # deleted.
402 402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403 403
404 404 # some exception to the above matching
405 405 #
406 406 # XXX This is currently not in use because of issue6542
407 407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408 408
409 409
410 410 def is_revlog(f, kind, st):
411 411 if kind != stat.S_IFREG:
412 412 return None
413 413 return revlog_type(f)
414 414
415 415
416 416 def revlog_type(f):
417 417 # XXX we need to filter `undo.` created by the transaction here, however
418 418 # being naive about it also filter revlog for `undo.*` files, leading to
419 419 # issue6542. So we no longer use EXCLUDED.
420 420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 421 return FILEFLAGS_REVLOG_MAIN
422 422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 423 t = FILETYPE_FILELOG_OTHER
424 424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 425 t |= FILEFLAGS_VOLATILE
426 426 return t
427 427 return None
428 428
429 429
430 430 # the file is part of changelog data
431 431 FILEFLAGS_CHANGELOG = 1 << 13
432 432 # the file is part of manifest data
433 433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 434 # the file is part of filelog data
435 435 FILEFLAGS_FILELOG = 1 << 11
436 436 # file that are not directly part of a revlog
437 437 FILEFLAGS_OTHER = 1 << 10
438 438
439 439 # the main entry point for a revlog
440 440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 441 # a secondary file for a revlog
442 442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443 443
444 444 # files that are "volatile" and might change between listing and streaming
445 445 FILEFLAGS_VOLATILE = 1 << 20
446 446
447 447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 453 FILETYPE_OTHER = FILEFLAGS_OTHER
454 454
455 455
456 @attr.s(slots=True)
456 @attr.s(slots=True, init=False)
457 457 class BaseStoreEntry:
458 458 """An entry in the store
459 459
460 460 This is returned by `store.walk` and represent some data in the store."""
461 461
462 462 unencoded_path = attr.ib()
463 463 is_volatile = attr.ib(default=False)
464 464 file_size = attr.ib(default=None)
465 465
466 def __init__(
467 self,
468 unencoded_path,
469 is_volatile=False,
470 file_size=None,
471 ):
472 self.unencoded_path = unencoded_path
473 self.is_volatile = is_volatile
474 self.file_size = file_size
475
466 476 def files(self):
467 477 return [
468 478 StoreFile(
469 479 unencoded_path=self.unencoded_path,
470 480 file_size=self.file_size,
471 481 is_volatile=self.is_volatile,
472 482 )
473 483 ]
474 484
475 485
476 @attr.s(slots=True)
486 @attr.s(slots=True, init=False)
477 487 class SimpleStoreEntry(BaseStoreEntry):
478 488 """A generic entry in the store"""
479 489
480 490 is_revlog = False
481 491
482 492
483 @attr.s(slots=True)
493 @attr.s(slots=True, init=False)
484 494 class RevlogStoreEntry(BaseStoreEntry):
485 495 """A revlog entry in the store"""
486 496
487 497 is_revlog = True
488 498 revlog_type = attr.ib(default=None)
489 499 is_revlog_main = attr.ib(default=None)
490 500
501 def __init__(
502 self,
503 unencoded_path,
504 revlog_type,
505 is_revlog_main=False,
506 is_volatile=False,
507 file_size=None,
508 ):
509 super().__init__(
510 unencoded_path=unencoded_path,
511 is_volatile=is_volatile,
512 file_size=file_size,
513 )
514 self.revlog_type = revlog_type
515 self.is_revlog_main = is_revlog_main
516
491 517
492 518 @attr.s(slots=True)
493 519 class StoreFile:
494 520 """a file matching an entry"""
495 521
496 522 unencoded_path = attr.ib()
497 523 file_size = attr.ib()
498 524 is_volatile = attr.ib(default=False)
499 525
500 526
501 527 class basicstore:
502 528 '''base class for local repository stores'''
503 529
504 530 def __init__(self, path, vfstype):
505 531 vfs = vfstype(path)
506 532 self.path = vfs.base
507 533 self.createmode = _calcmode(vfs)
508 534 vfs.createmode = self.createmode
509 535 self.rawvfs = vfs
510 536 self.vfs = vfsmod.filtervfs(vfs, encodedir)
511 537 self.opener = self.vfs
512 538
513 539 def join(self, f):
514 540 return self.path + b'/' + encodedir(f)
515 541
516 542 def _walk(self, relpath, recurse):
517 543 '''yields (revlog_type, unencoded, size)'''
518 544 path = self.path
519 545 if relpath:
520 546 path += b'/' + relpath
521 547 striplen = len(self.path) + 1
522 548 l = []
523 549 if self.rawvfs.isdir(path):
524 550 visit = [path]
525 551 readdir = self.rawvfs.readdir
526 552 while visit:
527 553 p = visit.pop()
528 554 for f, kind, st in readdir(p, stat=True):
529 555 fp = p + b'/' + f
530 556 rl_type = is_revlog(f, kind, st)
531 557 if rl_type is not None:
532 558 n = util.pconvert(fp[striplen:])
533 559 l.append((rl_type, decodedir(n), st.st_size))
534 560 elif kind == stat.S_IFDIR and recurse:
535 561 visit.append(fp)
536 562 l.sort()
537 563 return l
538 564
539 565 def changelog(self, trypending, concurrencychecker=None):
540 566 return changelog.changelog(
541 567 self.vfs,
542 568 trypending=trypending,
543 569 concurrencychecker=concurrencychecker,
544 570 )
545 571
546 572 def manifestlog(self, repo, storenarrowmatch):
547 573 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
548 574 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
549 575
550 576 def datafiles(
551 577 self, matcher=None, undecodable=None
552 578 ) -> Generator[BaseStoreEntry, None, None]:
553 579 """Like walk, but excluding the changelog and root manifest.
554 580
555 581 When [undecodable] is None, revlogs names that can't be
556 582 decoded cause an exception. When it is provided, it should
557 583 be a list and the filenames that can't be decoded are added
558 584 to it instead. This is very rarely needed."""
559 585 files = self._walk(b'data', True) + self._walk(b'meta', True)
560 586 for (t, u, s) in files:
561 587 if t is not None:
562 588 yield RevlogStoreEntry(
563 589 unencoded_path=u,
564 590 revlog_type=FILEFLAGS_FILELOG,
565 591 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
566 592 is_volatile=bool(t & FILEFLAGS_VOLATILE),
567 593 file_size=s,
568 594 )
569 595
570 596 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
571 597 # yield manifest before changelog
572 598 files = reversed(self._walk(b'', False))
573 599 for (t, u, s) in files:
574 600 if u.startswith(b'00changelog'):
575 601 yield RevlogStoreEntry(
576 602 unencoded_path=u,
577 603 revlog_type=FILEFLAGS_CHANGELOG,
578 604 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
579 605 is_volatile=bool(t & FILEFLAGS_VOLATILE),
580 606 file_size=s,
581 607 )
582 608 elif u.startswith(b'00manifest'):
583 609 yield RevlogStoreEntry(
584 610 unencoded_path=u,
585 611 revlog_type=FILEFLAGS_MANIFESTLOG,
586 612 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
587 613 is_volatile=bool(t & FILEFLAGS_VOLATILE),
588 614 file_size=s,
589 615 )
590 616 else:
591 617 yield SimpleStoreEntry(
592 618 unencoded_path=u,
593 619 is_volatile=bool(t & FILEFLAGS_VOLATILE),
594 620 file_size=s,
595 621 )
596 622
597 623 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
598 624 """return files related to data storage (ie: revlogs)
599 625
600 626 yields (file_type, unencoded, size)
601 627
602 628 if a matcher is passed, storage files of only those tracked paths
603 629 are passed with matches the matcher
604 630 """
605 631 # yield data files first
606 632 for x in self.datafiles(matcher):
607 633 yield x
608 634 for x in self.topfiles():
609 635 yield x
610 636
611 637 def copylist(self):
612 638 return _data
613 639
614 640 def write(self, tr):
615 641 pass
616 642
617 643 def invalidatecaches(self):
618 644 pass
619 645
620 646 def markremoved(self, fn):
621 647 pass
622 648
623 649 def __contains__(self, path):
624 650 '''Checks if the store contains path'''
625 651 path = b"/".join((b"data", path))
626 652 # file?
627 653 if self.vfs.exists(path + b".i"):
628 654 return True
629 655 # dir?
630 656 if not path.endswith(b"/"):
631 657 path = path + b"/"
632 658 return self.vfs.exists(path)
633 659
634 660
635 661 class encodedstore(basicstore):
636 662 def __init__(self, path, vfstype):
637 663 vfs = vfstype(path + b'/store')
638 664 self.path = vfs.base
639 665 self.createmode = _calcmode(vfs)
640 666 vfs.createmode = self.createmode
641 667 self.rawvfs = vfs
642 668 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
643 669 self.opener = self.vfs
644 670
645 671 # note: topfiles would also need a decode phase. It is just that in
646 672 # practice we do not have any file outside of `data/` that needs encoding.
647 673 # However that might change so we should probably add a test and encoding
648 674 # decoding for it too. see issue6548
649 675
650 676 def datafiles(
651 677 self, matcher=None, undecodable=None
652 678 ) -> Generator[BaseStoreEntry, None, None]:
653 679 for entry in super(encodedstore, self).datafiles():
654 680 try:
655 681 f1 = entry.unencoded_path
656 682 f2 = decodefilename(f1)
657 683 except KeyError:
658 684 if undecodable is None:
659 685 msg = _(b'undecodable revlog name %s') % f1
660 686 raise error.StorageError(msg)
661 687 else:
662 688 undecodable.append(f1)
663 689 continue
664 690 if not _matchtrackedpath(f2, matcher):
665 691 continue
666 692 entry.unencoded_path = f2
667 693 yield entry
668 694
669 695 def join(self, f):
670 696 return self.path + b'/' + encodefilename(f)
671 697
672 698 def copylist(self):
673 699 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
674 700
675 701
676 702 class fncache:
677 703 # the filename used to be partially encoded
678 704 # hence the encodedir/decodedir dance
679 705 def __init__(self, vfs):
680 706 self.vfs = vfs
681 707 self._ignores = set()
682 708 self.entries = None
683 709 self._dirty = False
684 710 # set of new additions to fncache
685 711 self.addls = set()
686 712
687 713 def ensureloaded(self, warn=None):
688 714 """read the fncache file if not already read.
689 715
690 716 If the file on disk is corrupted, raise. If warn is provided,
691 717 warn and keep going instead."""
692 718 if self.entries is None:
693 719 self._load(warn)
694 720
695 721 def _load(self, warn=None):
696 722 '''fill the entries from the fncache file'''
697 723 self._dirty = False
698 724 try:
699 725 fp = self.vfs(b'fncache', mode=b'rb')
700 726 except IOError:
701 727 # skip nonexistent file
702 728 self.entries = set()
703 729 return
704 730
705 731 self.entries = set()
706 732 chunk = b''
707 733 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
708 734 chunk += c
709 735 try:
710 736 p = chunk.rindex(b'\n')
711 737 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
712 738 chunk = chunk[p + 1 :]
713 739 except ValueError:
714 740 # substring '\n' not found, maybe the entry is bigger than the
715 741 # chunksize, so let's keep iterating
716 742 pass
717 743
718 744 if chunk:
719 745 msg = _(b"fncache does not ends with a newline")
720 746 if warn:
721 747 warn(msg + b'\n')
722 748 else:
723 749 raise error.Abort(
724 750 msg,
725 751 hint=_(
726 752 b"use 'hg debugrebuildfncache' to "
727 753 b"rebuild the fncache"
728 754 ),
729 755 )
730 756 self._checkentries(fp, warn)
731 757 fp.close()
732 758
733 759 def _checkentries(self, fp, warn):
734 760 """make sure there is no empty string in entries"""
735 761 if b'' in self.entries:
736 762 fp.seek(0)
737 763 for n, line in enumerate(fp):
738 764 if not line.rstrip(b'\n'):
739 765 t = _(b'invalid entry in fncache, line %d') % (n + 1)
740 766 if warn:
741 767 warn(t + b'\n')
742 768 else:
743 769 raise error.Abort(t)
744 770
745 771 def write(self, tr):
746 772 if self._dirty:
747 773 assert self.entries is not None
748 774 self.entries = self.entries | self.addls
749 775 self.addls = set()
750 776 tr.addbackup(b'fncache')
751 777 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
752 778 if self.entries:
753 779 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
754 780 fp.close()
755 781 self._dirty = False
756 782 if self.addls:
757 783 # if we have just new entries, let's append them to the fncache
758 784 tr.addbackup(b'fncache')
759 785 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
760 786 if self.addls:
761 787 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
762 788 fp.close()
763 789 self.entries = None
764 790 self.addls = set()
765 791
766 792 def addignore(self, fn):
767 793 self._ignores.add(fn)
768 794
769 795 def add(self, fn):
770 796 if fn in self._ignores:
771 797 return
772 798 if self.entries is None:
773 799 self._load()
774 800 if fn not in self.entries:
775 801 self.addls.add(fn)
776 802
777 803 def remove(self, fn):
778 804 if self.entries is None:
779 805 self._load()
780 806 if fn in self.addls:
781 807 self.addls.remove(fn)
782 808 return
783 809 try:
784 810 self.entries.remove(fn)
785 811 self._dirty = True
786 812 except KeyError:
787 813 pass
788 814
789 815 def __contains__(self, fn):
790 816 if fn in self.addls:
791 817 return True
792 818 if self.entries is None:
793 819 self._load()
794 820 return fn in self.entries
795 821
796 822 def __iter__(self):
797 823 if self.entries is None:
798 824 self._load()
799 825 return iter(self.entries | self.addls)
800 826
801 827
802 828 class _fncachevfs(vfsmod.proxyvfs):
803 829 def __init__(self, vfs, fnc, encode):
804 830 vfsmod.proxyvfs.__init__(self, vfs)
805 831 self.fncache = fnc
806 832 self.encode = encode
807 833
808 834 def __call__(self, path, mode=b'r', *args, **kw):
809 835 encoded = self.encode(path)
810 836 if (
811 837 mode not in (b'r', b'rb')
812 838 and (path.startswith(b'data/') or path.startswith(b'meta/'))
813 839 and revlog_type(path) is not None
814 840 ):
815 841 # do not trigger a fncache load when adding a file that already is
816 842 # known to exist.
817 843 notload = self.fncache.entries is None and self.vfs.exists(encoded)
818 844 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
819 845 # when appending to an existing file, if the file has size zero,
820 846 # it should be considered as missing. Such zero-size files are
821 847 # the result of truncation when a transaction is aborted.
822 848 notload = False
823 849 if not notload:
824 850 self.fncache.add(path)
825 851 return self.vfs(encoded, mode, *args, **kw)
826 852
827 853 def join(self, path):
828 854 if path:
829 855 return self.vfs.join(self.encode(path))
830 856 else:
831 857 return self.vfs.join(path)
832 858
833 859 def register_file(self, path):
834 860 """generic hook point to lets fncache steer its stew"""
835 861 if path.startswith(b'data/') or path.startswith(b'meta/'):
836 862 self.fncache.add(path)
837 863
838 864
839 865 class fncachestore(basicstore):
840 866 def __init__(self, path, vfstype, dotencode):
841 867 if dotencode:
842 868 encode = _pathencode
843 869 else:
844 870 encode = _plainhybridencode
845 871 self.encode = encode
846 872 vfs = vfstype(path + b'/store')
847 873 self.path = vfs.base
848 874 self.pathsep = self.path + b'/'
849 875 self.createmode = _calcmode(vfs)
850 876 vfs.createmode = self.createmode
851 877 self.rawvfs = vfs
852 878 fnc = fncache(vfs)
853 879 self.fncache = fnc
854 880 self.vfs = _fncachevfs(vfs, fnc, encode)
855 881 self.opener = self.vfs
856 882
857 883 def join(self, f):
858 884 return self.pathsep + self.encode(f)
859 885
860 886 def getsize(self, path):
861 887 return self.rawvfs.stat(path).st_size
862 888
863 889 def datafiles(
864 890 self, matcher=None, undecodable=None
865 891 ) -> Generator[BaseStoreEntry, None, None]:
866 892 for f in sorted(self.fncache):
867 893 if not _matchtrackedpath(f, matcher):
868 894 continue
869 895 ef = self.encode(f)
870 896 t = revlog_type(f)
871 897 if t is None:
872 898 # Note: this should not be in the fncache then…
873 899 #
874 900 # However the fncache might contains such file added by
875 901 # previous version of Mercurial.
876 902 continue
877 903 try:
878 904 yield RevlogStoreEntry(
879 905 unencoded_path=f,
880 906 revlog_type=FILEFLAGS_FILELOG,
881 907 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
882 908 is_volatile=bool(t & FILEFLAGS_VOLATILE),
883 909 file_size=self.getsize(ef),
884 910 )
885 911 except FileNotFoundError:
886 912 pass
887 913
888 914 def copylist(self):
889 915 d = (
890 916 b'bookmarks',
891 917 b'narrowspec',
892 918 b'data',
893 919 b'meta',
894 920 b'dh',
895 921 b'fncache',
896 922 b'phaseroots',
897 923 b'obsstore',
898 924 b'00manifest.d',
899 925 b'00manifest.i',
900 926 b'00changelog.d',
901 927 b'00changelog.i',
902 928 b'requires',
903 929 )
904 930 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
905 931
906 932 def write(self, tr):
907 933 self.fncache.write(tr)
908 934
909 935 def invalidatecaches(self):
910 936 self.fncache.entries = None
911 937 self.fncache.addls = set()
912 938
913 939 def markremoved(self, fn):
914 940 self.fncache.remove(fn)
915 941
916 942 def _exists(self, f):
917 943 ef = self.encode(f)
918 944 try:
919 945 self.getsize(ef)
920 946 return True
921 947 except FileNotFoundError:
922 948 return False
923 949
924 950 def __contains__(self, path):
925 951 '''Checks if the store contains path'''
926 952 path = b"/".join((b"data", path))
927 953 # check for files (exact match)
928 954 e = path + b'.i'
929 955 if e in self.fncache and self._exists(e):
930 956 return True
931 957 # now check for directories (prefix match)
932 958 if not path.endswith(b'/'):
933 959 path += b'/'
934 960 for e in self.fncache:
935 961 if e.startswith(path) and self._exists(e):
936 962 return True
937 963 return False
General Comments 0
You need to be logged in to leave comments. Login now