##// END OF EJS Templates
store: add logic to group revlog file together...
marmoute -
r51372:5217e363 default
parent child Browse files
Show More
@@ -1,971 +1,1004
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8
8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 REVLOG_FILES_LONG_EXT = (
400 b'.nd',
401 b'.idx',
402 b'.dat',
403 b'.sda',
404 )
398 405 # files that are "volatile" and might change between listing and streaming
399 406 #
400 407 # note: the ".nd" file are nodemap data and won't "change" but they might be
401 408 # deleted.
402 409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
403 410
404 411 # some exception to the above matching
405 412 #
406 413 # XXX This is currently not in use because of issue6542
407 414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
408 415
409 416
410 417 def is_revlog(f, kind, st):
411 418 if kind != stat.S_IFREG:
412 419 return None
413 420 return revlog_type(f)
414 421
415 422
416 423 def revlog_type(f):
417 424 # XXX we need to filter `undo.` created by the transaction here, however
418 425 # being naive about it also filter revlog for `undo.*` files, leading to
419 426 # issue6542. So we no longer use EXCLUDED.
420 427 if f.endswith(REVLOG_FILES_MAIN_EXT):
421 428 return FILEFLAGS_REVLOG_MAIN
422 429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
423 430 t = FILETYPE_FILELOG_OTHER
424 431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
425 432 t |= FILEFLAGS_VOLATILE
426 433 return t
427 434 return None
428 435
429 436
430 437 # the file is part of changelog data
431 438 FILEFLAGS_CHANGELOG = 1 << 13
432 439 # the file is part of manifest data
433 440 FILEFLAGS_MANIFESTLOG = 1 << 12
434 441 # the file is part of filelog data
435 442 FILEFLAGS_FILELOG = 1 << 11
436 443 # file that are not directly part of a revlog
437 444 FILEFLAGS_OTHER = 1 << 10
438 445
439 446 # the main entry point for a revlog
440 447 FILEFLAGS_REVLOG_MAIN = 1 << 1
441 448 # a secondary file for a revlog
442 449 FILEFLAGS_REVLOG_OTHER = 1 << 0
443 450
444 451 # files that are "volatile" and might change between listing and streaming
445 452 FILEFLAGS_VOLATILE = 1 << 20
446 453
447 454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
448 455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
449 456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
450 457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
451 458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
452 459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
453 460 FILETYPE_OTHER = FILEFLAGS_OTHER
454 461
455 462
456 463 @attr.s(slots=True, init=False)
457 464 class BaseStoreEntry:
458 465 """An entry in the store
459 466
460 467 This is returned by `store.walk` and represent some data in the store."""
461 468
462 469 unencoded_path = attr.ib()
463 470 _is_volatile = attr.ib(default=False)
464 471 _file_size = attr.ib(default=None)
465 472
466 473 def __init__(
467 474 self,
468 475 unencoded_path,
469 476 is_volatile=False,
470 477 file_size=None,
471 478 ):
472 479 self.unencoded_path = unencoded_path
473 480 self._is_volatile = is_volatile
474 481 self._file_size = file_size
475 482
476 483 def files(self):
477 484 return [
478 485 StoreFile(
479 486 unencoded_path=self.unencoded_path,
480 487 file_size=self._file_size,
481 488 is_volatile=self._is_volatile,
482 489 )
483 490 ]
484 491
485 492
486 493 @attr.s(slots=True, init=False)
487 494 class SimpleStoreEntry(BaseStoreEntry):
488 495 """A generic entry in the store"""
489 496
490 497 is_revlog = False
491 498
492 499
493 500 @attr.s(slots=True, init=False)
494 501 class RevlogStoreEntry(BaseStoreEntry):
495 502 """A revlog entry in the store"""
496 503
497 504 is_revlog = True
498 505 revlog_type = attr.ib(default=None)
499 506 is_revlog_main = attr.ib(default=None)
500 507
501 508 def __init__(
502 509 self,
503 510 unencoded_path,
504 511 revlog_type,
505 512 is_revlog_main=False,
506 513 is_volatile=False,
507 514 file_size=None,
508 515 ):
509 516 super().__init__(
510 517 unencoded_path=unencoded_path,
511 518 is_volatile=is_volatile,
512 519 file_size=file_size,
513 520 )
514 521 self.revlog_type = revlog_type
515 522 self.is_revlog_main = is_revlog_main
516 523
517 524
518 525 @attr.s(slots=True)
519 526 class StoreFile:
520 527 """a file matching an entry"""
521 528
522 529 unencoded_path = attr.ib()
523 530 _file_size = attr.ib(default=False)
524 531 is_volatile = attr.ib(default=False)
525 532
526 533 def file_size(self, vfs):
527 534 if self._file_size is not None:
528 535 return self._file_size
529 536 try:
530 537 return vfs.stat(self.unencoded_path).st_size
531 538 except FileNotFoundError:
532 539 return 0
533 540
534 541
542 def _gather_revlog(files_data):
543 """group files per revlog prefix
544
545 The returns a two level nested dict. The top level key is the revlog prefix
546 without extension, the second level is all the file "suffix" that were
547 seen for this revlog and arbitrary file data as value.
548 """
549 revlogs = collections.defaultdict(dict)
550 for u, value in files_data:
551 name, ext = _split_revlog_ext(u)
552 revlogs[name][ext] = value
553 return sorted(revlogs.items())
554
555
556 def _split_revlog_ext(filename):
557 """split the revlog file prefix from the variable extension"""
558 if filename.endswith(REVLOG_FILES_LONG_EXT):
559 char = b'-'
560 else:
561 char = b'.'
562 idx = filename.rfind(char)
563 return filename[:idx], filename[idx:]
564
565
535 566 class basicstore:
536 567 '''base class for local repository stores'''
537 568
538 569 def __init__(self, path, vfstype):
539 570 vfs = vfstype(path)
540 571 self.path = vfs.base
541 572 self.createmode = _calcmode(vfs)
542 573 vfs.createmode = self.createmode
543 574 self.rawvfs = vfs
544 575 self.vfs = vfsmod.filtervfs(vfs, encodedir)
545 576 self.opener = self.vfs
546 577
547 578 def join(self, f):
548 579 return self.path + b'/' + encodedir(f)
549 580
550 581 def _walk(self, relpath, recurse):
551 582 '''yields (revlog_type, unencoded, size)'''
552 583 path = self.path
553 584 if relpath:
554 585 path += b'/' + relpath
555 586 striplen = len(self.path) + 1
556 587 l = []
557 588 if self.rawvfs.isdir(path):
558 589 visit = [path]
559 590 readdir = self.rawvfs.readdir
560 591 while visit:
561 592 p = visit.pop()
562 593 for f, kind, st in readdir(p, stat=True):
563 594 fp = p + b'/' + f
564 595 rl_type = is_revlog(f, kind, st)
565 596 if rl_type is not None:
566 597 n = util.pconvert(fp[striplen:])
567 598 l.append((decodedir(n), (rl_type, st.st_size)))
568 599 elif kind == stat.S_IFDIR and recurse:
569 600 visit.append(fp)
570 601
571 602 l.sort()
572 603 return l
573 604
574 605 def changelog(self, trypending, concurrencychecker=None):
575 606 return changelog.changelog(
576 607 self.vfs,
577 608 trypending=trypending,
578 609 concurrencychecker=concurrencychecker,
579 610 )
580 611
581 612 def manifestlog(self, repo, storenarrowmatch):
582 613 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
583 614 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
584 615
585 616 def datafiles(
586 617 self, matcher=None, undecodable=None
587 618 ) -> Generator[BaseStoreEntry, None, None]:
588 619 """Like walk, but excluding the changelog and root manifest.
589 620
590 621 When [undecodable] is None, revlogs names that can't be
591 622 decoded cause an exception. When it is provided, it should
592 623 be a list and the filenames that can't be decoded are added
593 624 to it instead. This is very rarely needed."""
594 625 files = self._walk(b'data', True) + self._walk(b'meta', True)
595 for u, (t, s) in files:
596 if t is not None:
626 files = (f for f in files if f[1][0] is not None)
627 for revlog, details in _gather_revlog(files):
628 for ext, (t, s) in sorted(details.items()):
629 u = revlog + ext
597 630 yield RevlogStoreEntry(
598 631 unencoded_path=u,
599 632 revlog_type=FILEFLAGS_FILELOG,
600 633 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
601 634 is_volatile=bool(t & FILEFLAGS_VOLATILE),
602 635 file_size=s,
603 636 )
604 637
605 638 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
606 639 # yield manifest before changelog
607 640 files = self._walk(b'', False)
608 641 # key is (type, path) (keeping ordering so we get 00changelog.i last)
609 642 type_key = lambda x: (x[1][0], x[0])
610 643 files = sorted(files, reverse=True, key=type_key)
611 644 for u, (t, s) in files:
612 645 if u.startswith(b'00changelog'):
613 646 yield RevlogStoreEntry(
614 647 unencoded_path=u,
615 648 revlog_type=FILEFLAGS_CHANGELOG,
616 649 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
617 650 is_volatile=bool(t & FILEFLAGS_VOLATILE),
618 651 file_size=s,
619 652 )
620 653 elif u.startswith(b'00manifest'):
621 654 yield RevlogStoreEntry(
622 655 unencoded_path=u,
623 656 revlog_type=FILEFLAGS_MANIFESTLOG,
624 657 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
625 658 is_volatile=bool(t & FILEFLAGS_VOLATILE),
626 659 file_size=s,
627 660 )
628 661 else:
629 662 yield SimpleStoreEntry(
630 663 unencoded_path=u,
631 664 is_volatile=bool(t & FILEFLAGS_VOLATILE),
632 665 file_size=s,
633 666 )
634 667
635 668 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
636 669 """return files related to data storage (ie: revlogs)
637 670
638 671 yields (file_type, unencoded, size)
639 672
640 673 if a matcher is passed, storage files of only those tracked paths
641 674 are passed with matches the matcher
642 675 """
643 676 # yield data files first
644 677 for x in self.datafiles(matcher):
645 678 yield x
646 679 for x in self.topfiles():
647 680 yield x
648 681
649 682 def copylist(self):
650 683 return _data
651 684
652 685 def write(self, tr):
653 686 pass
654 687
655 688 def invalidatecaches(self):
656 689 pass
657 690
658 691 def markremoved(self, fn):
659 692 pass
660 693
661 694 def __contains__(self, path):
662 695 '''Checks if the store contains path'''
663 696 path = b"/".join((b"data", path))
664 697 # file?
665 698 if self.vfs.exists(path + b".i"):
666 699 return True
667 700 # dir?
668 701 if not path.endswith(b"/"):
669 702 path = path + b"/"
670 703 return self.vfs.exists(path)
671 704
672 705
673 706 class encodedstore(basicstore):
674 707 def __init__(self, path, vfstype):
675 708 vfs = vfstype(path + b'/store')
676 709 self.path = vfs.base
677 710 self.createmode = _calcmode(vfs)
678 711 vfs.createmode = self.createmode
679 712 self.rawvfs = vfs
680 713 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
681 714 self.opener = self.vfs
682 715
683 716 # note: topfiles would also need a decode phase. It is just that in
684 717 # practice we do not have any file outside of `data/` that needs encoding.
685 718 # However that might change so we should probably add a test and encoding
686 719 # decoding for it too. see issue6548
687 720
688 721 def datafiles(
689 722 self, matcher=None, undecodable=None
690 723 ) -> Generator[BaseStoreEntry, None, None]:
691 724 for entry in super(encodedstore, self).datafiles():
692 725 try:
693 726 f1 = entry.unencoded_path
694 727 f2 = decodefilename(f1)
695 728 except KeyError:
696 729 if undecodable is None:
697 730 msg = _(b'undecodable revlog name %s') % f1
698 731 raise error.StorageError(msg)
699 732 else:
700 733 undecodable.append(f1)
701 734 continue
702 735 if not _matchtrackedpath(f2, matcher):
703 736 continue
704 737 entry.unencoded_path = f2
705 738 yield entry
706 739
707 740 def join(self, f):
708 741 return self.path + b'/' + encodefilename(f)
709 742
710 743 def copylist(self):
711 744 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
712 745
713 746
714 747 class fncache:
715 748 # the filename used to be partially encoded
716 749 # hence the encodedir/decodedir dance
717 750 def __init__(self, vfs):
718 751 self.vfs = vfs
719 752 self._ignores = set()
720 753 self.entries = None
721 754 self._dirty = False
722 755 # set of new additions to fncache
723 756 self.addls = set()
724 757
725 758 def ensureloaded(self, warn=None):
726 759 """read the fncache file if not already read.
727 760
728 761 If the file on disk is corrupted, raise. If warn is provided,
729 762 warn and keep going instead."""
730 763 if self.entries is None:
731 764 self._load(warn)
732 765
733 766 def _load(self, warn=None):
734 767 '''fill the entries from the fncache file'''
735 768 self._dirty = False
736 769 try:
737 770 fp = self.vfs(b'fncache', mode=b'rb')
738 771 except IOError:
739 772 # skip nonexistent file
740 773 self.entries = set()
741 774 return
742 775
743 776 self.entries = set()
744 777 chunk = b''
745 778 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
746 779 chunk += c
747 780 try:
748 781 p = chunk.rindex(b'\n')
749 782 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
750 783 chunk = chunk[p + 1 :]
751 784 except ValueError:
752 785 # substring '\n' not found, maybe the entry is bigger than the
753 786 # chunksize, so let's keep iterating
754 787 pass
755 788
756 789 if chunk:
757 790 msg = _(b"fncache does not ends with a newline")
758 791 if warn:
759 792 warn(msg + b'\n')
760 793 else:
761 794 raise error.Abort(
762 795 msg,
763 796 hint=_(
764 797 b"use 'hg debugrebuildfncache' to "
765 798 b"rebuild the fncache"
766 799 ),
767 800 )
768 801 self._checkentries(fp, warn)
769 802 fp.close()
770 803
771 804 def _checkentries(self, fp, warn):
772 805 """make sure there is no empty string in entries"""
773 806 if b'' in self.entries:
774 807 fp.seek(0)
775 808 for n, line in enumerate(fp):
776 809 if not line.rstrip(b'\n'):
777 810 t = _(b'invalid entry in fncache, line %d') % (n + 1)
778 811 if warn:
779 812 warn(t + b'\n')
780 813 else:
781 814 raise error.Abort(t)
782 815
783 816 def write(self, tr):
784 817 if self._dirty:
785 818 assert self.entries is not None
786 819 self.entries = self.entries | self.addls
787 820 self.addls = set()
788 821 tr.addbackup(b'fncache')
789 822 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
790 823 if self.entries:
791 824 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
792 825 fp.close()
793 826 self._dirty = False
794 827 if self.addls:
795 828 # if we have just new entries, let's append them to the fncache
796 829 tr.addbackup(b'fncache')
797 830 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
798 831 if self.addls:
799 832 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
800 833 fp.close()
801 834 self.entries = None
802 835 self.addls = set()
803 836
804 837 def addignore(self, fn):
805 838 self._ignores.add(fn)
806 839
807 840 def add(self, fn):
808 841 if fn in self._ignores:
809 842 return
810 843 if self.entries is None:
811 844 self._load()
812 845 if fn not in self.entries:
813 846 self.addls.add(fn)
814 847
815 848 def remove(self, fn):
816 849 if self.entries is None:
817 850 self._load()
818 851 if fn in self.addls:
819 852 self.addls.remove(fn)
820 853 return
821 854 try:
822 855 self.entries.remove(fn)
823 856 self._dirty = True
824 857 except KeyError:
825 858 pass
826 859
827 860 def __contains__(self, fn):
828 861 if fn in self.addls:
829 862 return True
830 863 if self.entries is None:
831 864 self._load()
832 865 return fn in self.entries
833 866
834 867 def __iter__(self):
835 868 if self.entries is None:
836 869 self._load()
837 870 return iter(self.entries | self.addls)
838 871
839 872
840 873 class _fncachevfs(vfsmod.proxyvfs):
841 874 def __init__(self, vfs, fnc, encode):
842 875 vfsmod.proxyvfs.__init__(self, vfs)
843 876 self.fncache = fnc
844 877 self.encode = encode
845 878
846 879 def __call__(self, path, mode=b'r', *args, **kw):
847 880 encoded = self.encode(path)
848 881 if (
849 882 mode not in (b'r', b'rb')
850 883 and (path.startswith(b'data/') or path.startswith(b'meta/'))
851 884 and revlog_type(path) is not None
852 885 ):
853 886 # do not trigger a fncache load when adding a file that already is
854 887 # known to exist.
855 888 notload = self.fncache.entries is None and self.vfs.exists(encoded)
856 889 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
857 890 # when appending to an existing file, if the file has size zero,
858 891 # it should be considered as missing. Such zero-size files are
859 892 # the result of truncation when a transaction is aborted.
860 893 notload = False
861 894 if not notload:
862 895 self.fncache.add(path)
863 896 return self.vfs(encoded, mode, *args, **kw)
864 897
865 898 def join(self, path):
866 899 if path:
867 900 return self.vfs.join(self.encode(path))
868 901 else:
869 902 return self.vfs.join(path)
870 903
871 904 def register_file(self, path):
872 905 """generic hook point to lets fncache steer its stew"""
873 906 if path.startswith(b'data/') or path.startswith(b'meta/'):
874 907 self.fncache.add(path)
875 908
876 909
877 910 class fncachestore(basicstore):
878 911 def __init__(self, path, vfstype, dotencode):
879 912 if dotencode:
880 913 encode = _pathencode
881 914 else:
882 915 encode = _plainhybridencode
883 916 self.encode = encode
884 917 vfs = vfstype(path + b'/store')
885 918 self.path = vfs.base
886 919 self.pathsep = self.path + b'/'
887 920 self.createmode = _calcmode(vfs)
888 921 vfs.createmode = self.createmode
889 922 self.rawvfs = vfs
890 923 fnc = fncache(vfs)
891 924 self.fncache = fnc
892 925 self.vfs = _fncachevfs(vfs, fnc, encode)
893 926 self.opener = self.vfs
894 927
895 928 def join(self, f):
896 929 return self.pathsep + self.encode(f)
897 930
898 931 def getsize(self, path):
899 932 return self.rawvfs.stat(path).st_size
900 933
901 934 def datafiles(
902 935 self, matcher=None, undecodable=None
903 936 ) -> Generator[BaseStoreEntry, None, None]:
904 937 for f in sorted(self.fncache):
905 938 if not _matchtrackedpath(f, matcher):
906 939 continue
907 940 ef = self.encode(f)
908 941 t = revlog_type(f)
909 942 if t is None:
910 943 # Note: this should not be in the fncache then…
911 944 #
912 945 # However the fncache might contains such file added by
913 946 # previous version of Mercurial.
914 947 continue
915 948 yield RevlogStoreEntry(
916 949 unencoded_path=f,
917 950 revlog_type=FILEFLAGS_FILELOG,
918 951 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
919 952 is_volatile=bool(t & FILEFLAGS_VOLATILE),
920 953 )
921 954
922 955 def copylist(self):
923 956 d = (
924 957 b'bookmarks',
925 958 b'narrowspec',
926 959 b'data',
927 960 b'meta',
928 961 b'dh',
929 962 b'fncache',
930 963 b'phaseroots',
931 964 b'obsstore',
932 965 b'00manifest.d',
933 966 b'00manifest.i',
934 967 b'00changelog.d',
935 968 b'00changelog.i',
936 969 b'requires',
937 970 )
938 971 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
939 972
940 973 def write(self, tr):
941 974 self.fncache.write(tr)
942 975
943 976 def invalidatecaches(self):
944 977 self.fncache.entries = None
945 978 self.fncache.addls = set()
946 979
947 980 def markremoved(self, fn):
948 981 self.fncache.remove(fn)
949 982
950 983 def _exists(self, f):
951 984 ef = self.encode(f)
952 985 try:
953 986 self.getsize(ef)
954 987 return True
955 988 except FileNotFoundError:
956 989 return False
957 990
958 991 def __contains__(self, path):
959 992 '''Checks if the store contains path'''
960 993 path = b"/".join((b"data", path))
961 994 # check for files (exact match)
962 995 e = path + b'.i'
963 996 if e in self.fncache and self._exists(e):
964 997 return True
965 998 # now check for directories (prefix match)
966 999 if not path.endswith(b'/'):
967 1000 path += b'/'
968 1001 for e in self.fncache:
969 1002 if e.startswith(path) and self._exists(e):
970 1003 return True
971 1004 return False
General Comments 0
You need to be logged in to leave comments. Login now