##// END OF EJS Templates
store: only access file_size information through the file object...
marmoute -
r51368:7d4d2a16 default
parent child Browse files
Show More
@@ -1,963 +1,963 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 398 # files that are "volatile" and might change between listing and streaming
399 399 #
400 400 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 401 # deleted.
402 402 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403 403
404 404 # some exception to the above matching
405 405 #
406 406 # XXX This is currently not in use because of issue6542
407 407 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408 408
409 409
410 410 def is_revlog(f, kind, st):
411 411 if kind != stat.S_IFREG:
412 412 return None
413 413 return revlog_type(f)
414 414
415 415
416 416 def revlog_type(f):
417 417 # XXX we need to filter `undo.` created by the transaction here, however
418 418 # being naive about it also filter revlog for `undo.*` files, leading to
419 419 # issue6542. So we no longer use EXCLUDED.
420 420 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 421 return FILEFLAGS_REVLOG_MAIN
422 422 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 423 t = FILETYPE_FILELOG_OTHER
424 424 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 425 t |= FILEFLAGS_VOLATILE
426 426 return t
427 427 return None
428 428
429 429
430 430 # the file is part of changelog data
431 431 FILEFLAGS_CHANGELOG = 1 << 13
432 432 # the file is part of manifest data
433 433 FILEFLAGS_MANIFESTLOG = 1 << 12
434 434 # the file is part of filelog data
435 435 FILEFLAGS_FILELOG = 1 << 11
436 436 # file that are not directly part of a revlog
437 437 FILEFLAGS_OTHER = 1 << 10
438 438
439 439 # the main entry point for a revlog
440 440 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 441 # a secondary file for a revlog
442 442 FILEFLAGS_REVLOG_OTHER = 1 << 0
443 443
444 444 # files that are "volatile" and might change between listing and streaming
445 445 FILEFLAGS_VOLATILE = 1 << 20
446 446
447 447 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 448 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 449 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 450 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 451 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 452 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 453 FILETYPE_OTHER = FILEFLAGS_OTHER
454 454
455 455
456 456 @attr.s(slots=True, init=False)
457 457 class BaseStoreEntry:
458 458 """An entry in the store
459 459
460 460 This is returned by `store.walk` and represent some data in the store."""
461 461
462 462 unencoded_path = attr.ib()
463 463 is_volatile = attr.ib(default=False)
464 file_size = attr.ib(default=None)
464 _file_size = attr.ib(default=None)
465 465
466 466 def __init__(
467 467 self,
468 468 unencoded_path,
469 469 is_volatile=False,
470 470 file_size=None,
471 471 ):
472 472 self.unencoded_path = unencoded_path
473 473 self.is_volatile = is_volatile
474 self.file_size = file_size
474 self._file_size = file_size
475 475
476 476 def files(self):
477 477 return [
478 478 StoreFile(
479 479 unencoded_path=self.unencoded_path,
480 file_size=self.file_size,
480 file_size=self._file_size,
481 481 is_volatile=self.is_volatile,
482 482 )
483 483 ]
484 484
485 485
486 486 @attr.s(slots=True, init=False)
487 487 class SimpleStoreEntry(BaseStoreEntry):
488 488 """A generic entry in the store"""
489 489
490 490 is_revlog = False
491 491
492 492
493 493 @attr.s(slots=True, init=False)
494 494 class RevlogStoreEntry(BaseStoreEntry):
495 495 """A revlog entry in the store"""
496 496
497 497 is_revlog = True
498 498 revlog_type = attr.ib(default=None)
499 499 is_revlog_main = attr.ib(default=None)
500 500
501 501 def __init__(
502 502 self,
503 503 unencoded_path,
504 504 revlog_type,
505 505 is_revlog_main=False,
506 506 is_volatile=False,
507 507 file_size=None,
508 508 ):
509 509 super().__init__(
510 510 unencoded_path=unencoded_path,
511 511 is_volatile=is_volatile,
512 512 file_size=file_size,
513 513 )
514 514 self.revlog_type = revlog_type
515 515 self.is_revlog_main = is_revlog_main
516 516
517 517
518 518 @attr.s(slots=True)
519 519 class StoreFile:
520 520 """a file matching an entry"""
521 521
522 522 unencoded_path = attr.ib()
523 523 file_size = attr.ib()
524 524 is_volatile = attr.ib(default=False)
525 525
526 526
527 527 class basicstore:
528 528 '''base class for local repository stores'''
529 529
530 530 def __init__(self, path, vfstype):
531 531 vfs = vfstype(path)
532 532 self.path = vfs.base
533 533 self.createmode = _calcmode(vfs)
534 534 vfs.createmode = self.createmode
535 535 self.rawvfs = vfs
536 536 self.vfs = vfsmod.filtervfs(vfs, encodedir)
537 537 self.opener = self.vfs
538 538
539 539 def join(self, f):
540 540 return self.path + b'/' + encodedir(f)
541 541
542 542 def _walk(self, relpath, recurse):
543 543 '''yields (revlog_type, unencoded, size)'''
544 544 path = self.path
545 545 if relpath:
546 546 path += b'/' + relpath
547 547 striplen = len(self.path) + 1
548 548 l = []
549 549 if self.rawvfs.isdir(path):
550 550 visit = [path]
551 551 readdir = self.rawvfs.readdir
552 552 while visit:
553 553 p = visit.pop()
554 554 for f, kind, st in readdir(p, stat=True):
555 555 fp = p + b'/' + f
556 556 rl_type = is_revlog(f, kind, st)
557 557 if rl_type is not None:
558 558 n = util.pconvert(fp[striplen:])
559 559 l.append((rl_type, decodedir(n), st.st_size))
560 560 elif kind == stat.S_IFDIR and recurse:
561 561 visit.append(fp)
562 562 l.sort()
563 563 return l
564 564
565 565 def changelog(self, trypending, concurrencychecker=None):
566 566 return changelog.changelog(
567 567 self.vfs,
568 568 trypending=trypending,
569 569 concurrencychecker=concurrencychecker,
570 570 )
571 571
572 572 def manifestlog(self, repo, storenarrowmatch):
573 573 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
574 574 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
575 575
576 576 def datafiles(
577 577 self, matcher=None, undecodable=None
578 578 ) -> Generator[BaseStoreEntry, None, None]:
579 579 """Like walk, but excluding the changelog and root manifest.
580 580
581 581 When [undecodable] is None, revlogs names that can't be
582 582 decoded cause an exception. When it is provided, it should
583 583 be a list and the filenames that can't be decoded are added
584 584 to it instead. This is very rarely needed."""
585 585 files = self._walk(b'data', True) + self._walk(b'meta', True)
586 586 for (t, u, s) in files:
587 587 if t is not None:
588 588 yield RevlogStoreEntry(
589 589 unencoded_path=u,
590 590 revlog_type=FILEFLAGS_FILELOG,
591 591 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
592 592 is_volatile=bool(t & FILEFLAGS_VOLATILE),
593 593 file_size=s,
594 594 )
595 595
596 596 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
597 597 # yield manifest before changelog
598 598 files = reversed(self._walk(b'', False))
599 599 for (t, u, s) in files:
600 600 if u.startswith(b'00changelog'):
601 601 yield RevlogStoreEntry(
602 602 unencoded_path=u,
603 603 revlog_type=FILEFLAGS_CHANGELOG,
604 604 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
605 605 is_volatile=bool(t & FILEFLAGS_VOLATILE),
606 606 file_size=s,
607 607 )
608 608 elif u.startswith(b'00manifest'):
609 609 yield RevlogStoreEntry(
610 610 unencoded_path=u,
611 611 revlog_type=FILEFLAGS_MANIFESTLOG,
612 612 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
613 613 is_volatile=bool(t & FILEFLAGS_VOLATILE),
614 614 file_size=s,
615 615 )
616 616 else:
617 617 yield SimpleStoreEntry(
618 618 unencoded_path=u,
619 619 is_volatile=bool(t & FILEFLAGS_VOLATILE),
620 620 file_size=s,
621 621 )
622 622
623 623 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
624 624 """return files related to data storage (ie: revlogs)
625 625
626 626 yields (file_type, unencoded, size)
627 627
628 628 if a matcher is passed, storage files of only those tracked paths
629 629 are passed with matches the matcher
630 630 """
631 631 # yield data files first
632 632 for x in self.datafiles(matcher):
633 633 yield x
634 634 for x in self.topfiles():
635 635 yield x
636 636
637 637 def copylist(self):
638 638 return _data
639 639
640 640 def write(self, tr):
641 641 pass
642 642
643 643 def invalidatecaches(self):
644 644 pass
645 645
646 646 def markremoved(self, fn):
647 647 pass
648 648
649 649 def __contains__(self, path):
650 650 '''Checks if the store contains path'''
651 651 path = b"/".join((b"data", path))
652 652 # file?
653 653 if self.vfs.exists(path + b".i"):
654 654 return True
655 655 # dir?
656 656 if not path.endswith(b"/"):
657 657 path = path + b"/"
658 658 return self.vfs.exists(path)
659 659
660 660
661 661 class encodedstore(basicstore):
662 662 def __init__(self, path, vfstype):
663 663 vfs = vfstype(path + b'/store')
664 664 self.path = vfs.base
665 665 self.createmode = _calcmode(vfs)
666 666 vfs.createmode = self.createmode
667 667 self.rawvfs = vfs
668 668 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
669 669 self.opener = self.vfs
670 670
671 671 # note: topfiles would also need a decode phase. It is just that in
672 672 # practice we do not have any file outside of `data/` that needs encoding.
673 673 # However that might change so we should probably add a test and encoding
674 674 # decoding for it too. see issue6548
675 675
676 676 def datafiles(
677 677 self, matcher=None, undecodable=None
678 678 ) -> Generator[BaseStoreEntry, None, None]:
679 679 for entry in super(encodedstore, self).datafiles():
680 680 try:
681 681 f1 = entry.unencoded_path
682 682 f2 = decodefilename(f1)
683 683 except KeyError:
684 684 if undecodable is None:
685 685 msg = _(b'undecodable revlog name %s') % f1
686 686 raise error.StorageError(msg)
687 687 else:
688 688 undecodable.append(f1)
689 689 continue
690 690 if not _matchtrackedpath(f2, matcher):
691 691 continue
692 692 entry.unencoded_path = f2
693 693 yield entry
694 694
695 695 def join(self, f):
696 696 return self.path + b'/' + encodefilename(f)
697 697
698 698 def copylist(self):
699 699 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
700 700
701 701
702 702 class fncache:
703 703 # the filename used to be partially encoded
704 704 # hence the encodedir/decodedir dance
705 705 def __init__(self, vfs):
706 706 self.vfs = vfs
707 707 self._ignores = set()
708 708 self.entries = None
709 709 self._dirty = False
710 710 # set of new additions to fncache
711 711 self.addls = set()
712 712
713 713 def ensureloaded(self, warn=None):
714 714 """read the fncache file if not already read.
715 715
716 716 If the file on disk is corrupted, raise. If warn is provided,
717 717 warn and keep going instead."""
718 718 if self.entries is None:
719 719 self._load(warn)
720 720
721 721 def _load(self, warn=None):
722 722 '''fill the entries from the fncache file'''
723 723 self._dirty = False
724 724 try:
725 725 fp = self.vfs(b'fncache', mode=b'rb')
726 726 except IOError:
727 727 # skip nonexistent file
728 728 self.entries = set()
729 729 return
730 730
731 731 self.entries = set()
732 732 chunk = b''
733 733 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
734 734 chunk += c
735 735 try:
736 736 p = chunk.rindex(b'\n')
737 737 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
738 738 chunk = chunk[p + 1 :]
739 739 except ValueError:
740 740 # substring '\n' not found, maybe the entry is bigger than the
741 741 # chunksize, so let's keep iterating
742 742 pass
743 743
744 744 if chunk:
745 745 msg = _(b"fncache does not ends with a newline")
746 746 if warn:
747 747 warn(msg + b'\n')
748 748 else:
749 749 raise error.Abort(
750 750 msg,
751 751 hint=_(
752 752 b"use 'hg debugrebuildfncache' to "
753 753 b"rebuild the fncache"
754 754 ),
755 755 )
756 756 self._checkentries(fp, warn)
757 757 fp.close()
758 758
759 759 def _checkentries(self, fp, warn):
760 760 """make sure there is no empty string in entries"""
761 761 if b'' in self.entries:
762 762 fp.seek(0)
763 763 for n, line in enumerate(fp):
764 764 if not line.rstrip(b'\n'):
765 765 t = _(b'invalid entry in fncache, line %d') % (n + 1)
766 766 if warn:
767 767 warn(t + b'\n')
768 768 else:
769 769 raise error.Abort(t)
770 770
771 771 def write(self, tr):
772 772 if self._dirty:
773 773 assert self.entries is not None
774 774 self.entries = self.entries | self.addls
775 775 self.addls = set()
776 776 tr.addbackup(b'fncache')
777 777 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
778 778 if self.entries:
779 779 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
780 780 fp.close()
781 781 self._dirty = False
782 782 if self.addls:
783 783 # if we have just new entries, let's append them to the fncache
784 784 tr.addbackup(b'fncache')
785 785 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
786 786 if self.addls:
787 787 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
788 788 fp.close()
789 789 self.entries = None
790 790 self.addls = set()
791 791
792 792 def addignore(self, fn):
793 793 self._ignores.add(fn)
794 794
795 795 def add(self, fn):
796 796 if fn in self._ignores:
797 797 return
798 798 if self.entries is None:
799 799 self._load()
800 800 if fn not in self.entries:
801 801 self.addls.add(fn)
802 802
803 803 def remove(self, fn):
804 804 if self.entries is None:
805 805 self._load()
806 806 if fn in self.addls:
807 807 self.addls.remove(fn)
808 808 return
809 809 try:
810 810 self.entries.remove(fn)
811 811 self._dirty = True
812 812 except KeyError:
813 813 pass
814 814
815 815 def __contains__(self, fn):
816 816 if fn in self.addls:
817 817 return True
818 818 if self.entries is None:
819 819 self._load()
820 820 return fn in self.entries
821 821
822 822 def __iter__(self):
823 823 if self.entries is None:
824 824 self._load()
825 825 return iter(self.entries | self.addls)
826 826
827 827
828 828 class _fncachevfs(vfsmod.proxyvfs):
829 829 def __init__(self, vfs, fnc, encode):
830 830 vfsmod.proxyvfs.__init__(self, vfs)
831 831 self.fncache = fnc
832 832 self.encode = encode
833 833
834 834 def __call__(self, path, mode=b'r', *args, **kw):
835 835 encoded = self.encode(path)
836 836 if (
837 837 mode not in (b'r', b'rb')
838 838 and (path.startswith(b'data/') or path.startswith(b'meta/'))
839 839 and revlog_type(path) is not None
840 840 ):
841 841 # do not trigger a fncache load when adding a file that already is
842 842 # known to exist.
843 843 notload = self.fncache.entries is None and self.vfs.exists(encoded)
844 844 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
845 845 # when appending to an existing file, if the file has size zero,
846 846 # it should be considered as missing. Such zero-size files are
847 847 # the result of truncation when a transaction is aborted.
848 848 notload = False
849 849 if not notload:
850 850 self.fncache.add(path)
851 851 return self.vfs(encoded, mode, *args, **kw)
852 852
853 853 def join(self, path):
854 854 if path:
855 855 return self.vfs.join(self.encode(path))
856 856 else:
857 857 return self.vfs.join(path)
858 858
859 859 def register_file(self, path):
860 860 """generic hook point to lets fncache steer its stew"""
861 861 if path.startswith(b'data/') or path.startswith(b'meta/'):
862 862 self.fncache.add(path)
863 863
864 864
865 865 class fncachestore(basicstore):
866 866 def __init__(self, path, vfstype, dotencode):
867 867 if dotencode:
868 868 encode = _pathencode
869 869 else:
870 870 encode = _plainhybridencode
871 871 self.encode = encode
872 872 vfs = vfstype(path + b'/store')
873 873 self.path = vfs.base
874 874 self.pathsep = self.path + b'/'
875 875 self.createmode = _calcmode(vfs)
876 876 vfs.createmode = self.createmode
877 877 self.rawvfs = vfs
878 878 fnc = fncache(vfs)
879 879 self.fncache = fnc
880 880 self.vfs = _fncachevfs(vfs, fnc, encode)
881 881 self.opener = self.vfs
882 882
883 883 def join(self, f):
884 884 return self.pathsep + self.encode(f)
885 885
886 886 def getsize(self, path):
887 887 return self.rawvfs.stat(path).st_size
888 888
889 889 def datafiles(
890 890 self, matcher=None, undecodable=None
891 891 ) -> Generator[BaseStoreEntry, None, None]:
892 892 for f in sorted(self.fncache):
893 893 if not _matchtrackedpath(f, matcher):
894 894 continue
895 895 ef = self.encode(f)
896 896 t = revlog_type(f)
897 897 if t is None:
898 898 # Note: this should not be in the fncache then…
899 899 #
900 900 # However the fncache might contains such file added by
901 901 # previous version of Mercurial.
902 902 continue
903 903 try:
904 904 yield RevlogStoreEntry(
905 905 unencoded_path=f,
906 906 revlog_type=FILEFLAGS_FILELOG,
907 907 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
908 908 is_volatile=bool(t & FILEFLAGS_VOLATILE),
909 909 file_size=self.getsize(ef),
910 910 )
911 911 except FileNotFoundError:
912 912 pass
913 913
914 914 def copylist(self):
915 915 d = (
916 916 b'bookmarks',
917 917 b'narrowspec',
918 918 b'data',
919 919 b'meta',
920 920 b'dh',
921 921 b'fncache',
922 922 b'phaseroots',
923 923 b'obsstore',
924 924 b'00manifest.d',
925 925 b'00manifest.i',
926 926 b'00changelog.d',
927 927 b'00changelog.i',
928 928 b'requires',
929 929 )
930 930 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
931 931
932 932 def write(self, tr):
933 933 self.fncache.write(tr)
934 934
935 935 def invalidatecaches(self):
936 936 self.fncache.entries = None
937 937 self.fncache.addls = set()
938 938
939 939 def markremoved(self, fn):
940 940 self.fncache.remove(fn)
941 941
942 942 def _exists(self, f):
943 943 ef = self.encode(f)
944 944 try:
945 945 self.getsize(ef)
946 946 return True
947 947 except FileNotFoundError:
948 948 return False
949 949
950 950 def __contains__(self, path):
951 951 '''Checks if the store contains path'''
952 952 path = b"/".join((b"data", path))
953 953 # check for files (exact match)
954 954 e = path + b'.i'
955 955 if e in self.fncache and self._exists(e):
956 956 return True
957 957 # now check for directories (prefix match)
958 958 if not path.endswith(b'/'):
959 959 path += b'/'
960 960 for e in self.fncache:
961 961 if e.startswith(path) and self._exists(e):
962 962 return True
963 963 return False
General Comments 0
You need to be logged in to leave comments. Login now