##// END OF EJS Templates
store: declare a `files` method on BaseStoreEntry...
marmoute -
r51531:8fc10bfd default
parent child Browse files
Show More
@@ -1,1124 +1,1127 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 from typing import Generator
13 from typing import Generator, List
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 filelog,
23 23 manifest,
24 24 policy,
25 25 pycompat,
26 26 util,
27 27 vfs as vfsmod,
28 28 )
29 29 from .utils import hashutil
30 30
31 31 parsers = policy.importmod('parsers')
32 32 # how much bytes should be read from fncache in one read
33 33 # It is done to prevent loading large fncache files into memory
34 34 fncache_chunksize = 10 ** 6
35 35
36 36
37 37 def _match_tracked_entry(entry, matcher):
38 38 """parses a fncache entry and returns whether the entry is tracking a path
39 39 matched by matcher or not.
40 40
41 41 If matcher is None, returns True"""
42 42
43 43 if matcher is None:
44 44 return True
45 45 if entry.is_filelog:
46 46 return matcher(entry.target_id)
47 47 elif entry.is_manifestlog:
48 48 return matcher.visitdir(entry.target_id.rstrip(b'/'))
49 49 raise error.ProgrammingError(b"cannot process entry %r" % entry)
50 50
51 51
52 52 # This avoids a collision between a file named foo and a dir named
53 53 # foo.i or foo.d
54 54 def _encodedir(path):
55 55 """
56 56 >>> _encodedir(b'data/foo.i')
57 57 'data/foo.i'
58 58 >>> _encodedir(b'data/foo.i/bla.i')
59 59 'data/foo.i.hg/bla.i'
60 60 >>> _encodedir(b'data/foo.i.hg/bla.i')
61 61 'data/foo.i.hg.hg/bla.i'
62 62 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
63 63 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
64 64 """
65 65 return (
66 66 path.replace(b".hg/", b".hg.hg/")
67 67 .replace(b".i/", b".i.hg/")
68 68 .replace(b".d/", b".d.hg/")
69 69 )
70 70
71 71
72 72 encodedir = getattr(parsers, 'encodedir', _encodedir)
73 73
74 74
75 75 def decodedir(path):
76 76 """
77 77 >>> decodedir(b'data/foo.i')
78 78 'data/foo.i'
79 79 >>> decodedir(b'data/foo.i.hg/bla.i')
80 80 'data/foo.i/bla.i'
81 81 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
82 82 'data/foo.i.hg/bla.i'
83 83 """
84 84 if b".hg/" not in path:
85 85 return path
86 86 return (
87 87 path.replace(b".d.hg/", b".d/")
88 88 .replace(b".i.hg/", b".i/")
89 89 .replace(b".hg.hg/", b".hg/")
90 90 )
91 91
92 92
93 93 def _reserved():
94 94 """characters that are problematic for filesystems
95 95
96 96 * ascii escapes (0..31)
97 97 * ascii hi (126..255)
98 98 * windows specials
99 99
100 100 these characters will be escaped by encodefunctions
101 101 """
102 102 winreserved = [ord(x) for x in u'\\:*?"<>|']
103 103 for x in range(32):
104 104 yield x
105 105 for x in range(126, 256):
106 106 yield x
107 107 for x in winreserved:
108 108 yield x
109 109
110 110
111 111 def _buildencodefun():
112 112 """
113 113 >>> enc, dec = _buildencodefun()
114 114
115 115 >>> enc(b'nothing/special.txt')
116 116 'nothing/special.txt'
117 117 >>> dec(b'nothing/special.txt')
118 118 'nothing/special.txt'
119 119
120 120 >>> enc(b'HELLO')
121 121 '_h_e_l_l_o'
122 122 >>> dec(b'_h_e_l_l_o')
123 123 'HELLO'
124 124
125 125 >>> enc(b'hello:world?')
126 126 'hello~3aworld~3f'
127 127 >>> dec(b'hello~3aworld~3f')
128 128 'hello:world?'
129 129
130 130 >>> enc(b'the\\x07quick\\xADshot')
131 131 'the~07quick~adshot'
132 132 >>> dec(b'the~07quick~adshot')
133 133 'the\\x07quick\\xadshot'
134 134 """
135 135 e = b'_'
136 136 xchr = pycompat.bytechr
137 137 asciistr = list(map(xchr, range(127)))
138 138 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
139 139
140 140 cmap = {x: x for x in asciistr}
141 141 for x in _reserved():
142 142 cmap[xchr(x)] = b"~%02x" % x
143 143 for x in capitals + [ord(e)]:
144 144 cmap[xchr(x)] = e + xchr(x).lower()
145 145
146 146 dmap = {}
147 147 for k, v in cmap.items():
148 148 dmap[v] = k
149 149
150 150 def decode(s):
151 151 i = 0
152 152 while i < len(s):
153 153 for l in range(1, 4):
154 154 try:
155 155 yield dmap[s[i : i + l]]
156 156 i += l
157 157 break
158 158 except KeyError:
159 159 pass
160 160 else:
161 161 raise KeyError
162 162
163 163 return (
164 164 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
165 165 lambda s: b''.join(list(decode(s))),
166 166 )
167 167
168 168
169 169 _encodefname, _decodefname = _buildencodefun()
170 170
171 171
172 172 def encodefilename(s):
173 173 """
174 174 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
175 175 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
176 176 """
177 177 return _encodefname(encodedir(s))
178 178
179 179
180 180 def decodefilename(s):
181 181 """
182 182 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
183 183 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
184 184 """
185 185 return decodedir(_decodefname(s))
186 186
187 187
188 188 def _buildlowerencodefun():
189 189 """
190 190 >>> f = _buildlowerencodefun()
191 191 >>> f(b'nothing/special.txt')
192 192 'nothing/special.txt'
193 193 >>> f(b'HELLO')
194 194 'hello'
195 195 >>> f(b'hello:world?')
196 196 'hello~3aworld~3f'
197 197 >>> f(b'the\\x07quick\\xADshot')
198 198 'the~07quick~adshot'
199 199 """
200 200 xchr = pycompat.bytechr
201 201 cmap = {xchr(x): xchr(x) for x in range(127)}
202 202 for x in _reserved():
203 203 cmap[xchr(x)] = b"~%02x" % x
204 204 for x in range(ord(b"A"), ord(b"Z") + 1):
205 205 cmap[xchr(x)] = xchr(x).lower()
206 206
207 207 def lowerencode(s):
208 208 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
209 209
210 210 return lowerencode
211 211
212 212
213 213 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
214 214
215 215 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
216 216 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
217 217 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
218 218
219 219
220 220 def _auxencode(path, dotencode):
221 221 """
222 222 Encodes filenames containing names reserved by Windows or which end in
223 223 period or space. Does not touch other single reserved characters c.
224 224 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
225 225 Additionally encodes space or period at the beginning, if dotencode is
226 226 True. Parameter path is assumed to be all lowercase.
227 227 A segment only needs encoding if a reserved name appears as a
228 228 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
229 229 doesn't need encoding.
230 230
231 231 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
232 232 >>> _auxencode(s.split(b'/'), True)
233 233 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
234 234 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
235 235 >>> _auxencode(s.split(b'/'), False)
236 236 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
237 237 >>> _auxencode([b'foo. '], True)
238 238 ['foo.~20']
239 239 >>> _auxencode([b' .foo'], True)
240 240 ['~20.foo']
241 241 """
242 242 for i, n in enumerate(path):
243 243 if not n:
244 244 continue
245 245 if dotencode and n[0] in b'. ':
246 246 n = b"~%02x" % ord(n[0:1]) + n[1:]
247 247 path[i] = n
248 248 else:
249 249 l = n.find(b'.')
250 250 if l == -1:
251 251 l = len(n)
252 252 if (l == 3 and n[:3] in _winres3) or (
253 253 l == 4
254 254 and n[3:4] <= b'9'
255 255 and n[3:4] >= b'1'
256 256 and n[:3] in _winres4
257 257 ):
258 258 # encode third letter ('aux' -> 'au~78')
259 259 ec = b"~%02x" % ord(n[2:3])
260 260 n = n[0:2] + ec + n[3:]
261 261 path[i] = n
262 262 if n[-1] in b'. ':
263 263 # encode last period or space ('foo...' -> 'foo..~2e')
264 264 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
265 265 return path
266 266
267 267
268 268 _maxstorepathlen = 120
269 269 _dirprefixlen = 8
270 270 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
271 271
272 272
273 273 def _hashencode(path, dotencode):
274 274 digest = hex(hashutil.sha1(path).digest())
275 275 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
276 276 parts = _auxencode(le, dotencode)
277 277 basename = parts[-1]
278 278 _root, ext = os.path.splitext(basename)
279 279 sdirs = []
280 280 sdirslen = 0
281 281 for p in parts[:-1]:
282 282 d = p[:_dirprefixlen]
283 283 if d[-1] in b'. ':
284 284 # Windows can't access dirs ending in period or space
285 285 d = d[:-1] + b'_'
286 286 if sdirslen == 0:
287 287 t = len(d)
288 288 else:
289 289 t = sdirslen + 1 + len(d)
290 290 if t > _maxshortdirslen:
291 291 break
292 292 sdirs.append(d)
293 293 sdirslen = t
294 294 dirs = b'/'.join(sdirs)
295 295 if len(dirs) > 0:
296 296 dirs += b'/'
297 297 res = b'dh/' + dirs + digest + ext
298 298 spaceleft = _maxstorepathlen - len(res)
299 299 if spaceleft > 0:
300 300 filler = basename[:spaceleft]
301 301 res = b'dh/' + dirs + filler + digest + ext
302 302 return res
303 303
304 304
305 305 def _hybridencode(path, dotencode):
306 306 """encodes path with a length limit
307 307
308 308 Encodes all paths that begin with 'data/', according to the following.
309 309
310 310 Default encoding (reversible):
311 311
312 312 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
313 313 characters are encoded as '~xx', where xx is the two digit hex code
314 314 of the character (see encodefilename).
315 315 Relevant path components consisting of Windows reserved filenames are
316 316 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
317 317
318 318 Hashed encoding (not reversible):
319 319
320 320 If the default-encoded path is longer than _maxstorepathlen, a
321 321 non-reversible hybrid hashing of the path is done instead.
322 322 This encoding uses up to _dirprefixlen characters of all directory
323 323 levels of the lowerencoded path, but not more levels than can fit into
324 324 _maxshortdirslen.
325 325 Then follows the filler followed by the sha digest of the full path.
326 326 The filler is the beginning of the basename of the lowerencoded path
327 327 (the basename is everything after the last path separator). The filler
328 328 is as long as possible, filling in characters from the basename until
329 329 the encoded path has _maxstorepathlen characters (or all chars of the
330 330 basename have been taken).
331 331 The extension (e.g. '.i' or '.d') is preserved.
332 332
333 333 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
334 334 encoding was used.
335 335 """
336 336 path = encodedir(path)
337 337 ef = _encodefname(path).split(b'/')
338 338 res = b'/'.join(_auxencode(ef, dotencode))
339 339 if len(res) > _maxstorepathlen:
340 340 res = _hashencode(path, dotencode)
341 341 return res
342 342
343 343
344 344 def _pathencode(path):
345 345 de = encodedir(path)
346 346 if len(path) > _maxstorepathlen:
347 347 return _hashencode(de, True)
348 348 ef = _encodefname(de).split(b'/')
349 349 res = b'/'.join(_auxencode(ef, True))
350 350 if len(res) > _maxstorepathlen:
351 351 return _hashencode(de, True)
352 352 return res
353 353
354 354
355 355 _pathencode = getattr(parsers, 'pathencode', _pathencode)
356 356
357 357
358 358 def _plainhybridencode(f):
359 359 return _hybridencode(f, False)
360 360
361 361
362 362 def _calcmode(vfs):
363 363 try:
364 364 # files in .hg/ will be created using this mode
365 365 mode = vfs.stat().st_mode
366 366 # avoid some useless chmods
367 367 if (0o777 & ~util.umask) == (0o777 & mode):
368 368 mode = None
369 369 except OSError:
370 370 mode = None
371 371 return mode
372 372
373 373
374 374 _data = [
375 375 b'bookmarks',
376 376 b'narrowspec',
377 377 b'data',
378 378 b'meta',
379 379 b'00manifest.d',
380 380 b'00manifest.i',
381 381 b'00changelog.d',
382 382 b'00changelog.i',
383 383 b'phaseroots',
384 384 b'obsstore',
385 385 b'requires',
386 386 ]
387 387
388 388 REVLOG_FILES_MAIN_EXT = (b'.i',)
389 389 REVLOG_FILES_OTHER_EXT = (
390 390 b'.idx',
391 391 b'.d',
392 392 b'.dat',
393 393 b'.n',
394 394 b'.nd',
395 395 b'.sda',
396 396 )
397 397 # file extension that also use a `-SOMELONGIDHASH.ext` form
398 398 REVLOG_FILES_LONG_EXT = (
399 399 b'.nd',
400 400 b'.idx',
401 401 b'.dat',
402 402 b'.sda',
403 403 )
404 404 # files that are "volatile" and might change between listing and streaming
405 405 #
406 406 # note: the ".nd" file are nodemap data and won't "change" but they might be
407 407 # deleted.
408 408 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
409 409
410 410 # some exception to the above matching
411 411 #
412 412 # XXX This is currently not in use because of issue6542
413 413 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
414 414
415 415
416 416 def is_revlog(f, kind, st):
417 417 if kind != stat.S_IFREG:
418 418 return None
419 419 return revlog_type(f)
420 420
421 421
422 422 def revlog_type(f):
423 423 # XXX we need to filter `undo.` created by the transaction here, however
424 424 # being naive about it also filter revlog for `undo.*` files, leading to
425 425 # issue6542. So we no longer use EXCLUDED.
426 426 if f.endswith(REVLOG_FILES_MAIN_EXT):
427 427 return FILEFLAGS_REVLOG_MAIN
428 428 elif f.endswith(REVLOG_FILES_OTHER_EXT):
429 429 t = FILETYPE_FILELOG_OTHER
430 430 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
431 431 t |= FILEFLAGS_VOLATILE
432 432 return t
433 433 return None
434 434
435 435
436 436 # the file is part of changelog data
437 437 FILEFLAGS_CHANGELOG = 1 << 13
438 438 # the file is part of manifest data
439 439 FILEFLAGS_MANIFESTLOG = 1 << 12
440 440 # the file is part of filelog data
441 441 FILEFLAGS_FILELOG = 1 << 11
442 442 # file that are not directly part of a revlog
443 443 FILEFLAGS_OTHER = 1 << 10
444 444
445 445 # the main entry point for a revlog
446 446 FILEFLAGS_REVLOG_MAIN = 1 << 1
447 447 # a secondary file for a revlog
448 448 FILEFLAGS_REVLOG_OTHER = 1 << 0
449 449
450 450 # files that are "volatile" and might change between listing and streaming
451 451 FILEFLAGS_VOLATILE = 1 << 20
452 452
453 453 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
454 454 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
455 455 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
456 456 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
457 457 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
458 458 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
459 459 FILETYPE_OTHER = FILEFLAGS_OTHER
460 460
461 461
462 @attr.s(slots=True)
463 class StoreFile:
464 """a file matching a store entry"""
465
466 unencoded_path = attr.ib()
467 _file_size = attr.ib(default=None)
468 is_volatile = attr.ib(default=False)
469
470 def file_size(self, vfs):
471 if self._file_size is None:
472 try:
473 self._file_size = vfs.stat(self.unencoded_path).st_size
474 except FileNotFoundError:
475 self._file_size = 0
476 return self._file_size
477
478
462 479 @attr.s(slots=True, init=False)
463 480 class BaseStoreEntry:
464 481 """An entry in the store
465 482
466 483 This is returned by `store.walk` and represent some data in the store."""
467 484
485 def files(self) -> List[StoreFile]:
486 raise NotImplementedError
487
468 488
469 489 @attr.s(slots=True, init=False)
470 490 class SimpleStoreEntry(BaseStoreEntry):
471 491 """A generic entry in the store"""
472 492
473 493 is_revlog = False
474 494
475 495 _entry_path = attr.ib()
476 496 _is_volatile = attr.ib(default=False)
477 497 _file_size = attr.ib(default=None)
478 498 _files = attr.ib(default=None)
479 499
480 500 def __init__(
481 501 self,
482 502 entry_path,
483 503 is_volatile=False,
484 504 file_size=None,
485 505 ):
486 506 super().__init__()
487 507 self._entry_path = entry_path
488 508 self._is_volatile = is_volatile
489 509 self._file_size = file_size
490 510 self._files = None
491 511
492 def files(self):
512 def files(self) -> List[StoreFile]:
493 513 if self._files is None:
494 514 self._files = [
495 515 StoreFile(
496 516 unencoded_path=self._entry_path,
497 517 file_size=self._file_size,
498 518 is_volatile=self._is_volatile,
499 519 )
500 520 ]
501 521 return self._files
502 522
503 523
504 524 @attr.s(slots=True, init=False)
505 525 class RevlogStoreEntry(BaseStoreEntry):
506 526 """A revlog entry in the store"""
507 527
508 528 is_revlog = True
509 529
510 530 revlog_type = attr.ib(default=None)
511 531 target_id = attr.ib(default=None)
512 532 _path_prefix = attr.ib(default=None)
513 533 _details = attr.ib(default=None)
514 534 _files = attr.ib(default=None)
515 535
516 536 def __init__(
517 537 self,
518 538 revlog_type,
519 539 path_prefix,
520 540 target_id,
521 541 details,
522 542 ):
523 543 super().__init__()
524 544 self.revlog_type = revlog_type
525 545 self.target_id = target_id
526 546 self._path_prefix = path_prefix
527 547 assert b'.i' in details, (path_prefix, details)
528 548 self._details = details
529 549 self._files = None
530 550
531 551 @property
532 552 def is_changelog(self):
533 553 return self.revlog_type & FILEFLAGS_CHANGELOG
534 554
535 555 @property
536 556 def is_manifestlog(self):
537 557 return self.revlog_type & FILEFLAGS_MANIFESTLOG
538 558
539 559 @property
540 560 def is_filelog(self):
541 561 return self.revlog_type & FILEFLAGS_FILELOG
542 562
543 563 def main_file_path(self):
544 564 """unencoded path of the main revlog file"""
545 565 return self._path_prefix + b'.i'
546 566
547 def files(self):
567 def files(self) -> List[StoreFile]:
548 568 if self._files is None:
549 569 self._files = []
550 570 for ext in sorted(self._details, key=_ext_key):
551 571 path = self._path_prefix + ext
552 572 data = self._details[ext]
553 573 self._files.append(StoreFile(unencoded_path=path, **data))
554 574 return self._files
555 575
556 576 def get_revlog_instance(self, repo):
557 577 """Obtain a revlog instance from this store entry
558 578
559 579 An instance of the appropriate class is returned.
560 580 """
561 581 if self.is_changelog:
562 582 return changelog.changelog(repo.svfs)
563 583 elif self.is_manifestlog:
564 584 mandir = self.target_id
565 585 return manifest.manifestrevlog(
566 586 repo.nodeconstants, repo.svfs, tree=mandir
567 587 )
568 588 else:
569 589 return filelog.filelog(repo.svfs, self.target_id)
570 590
571 591
572 @attr.s(slots=True)
573 class StoreFile:
574 """a file matching an entry"""
575
576 unencoded_path = attr.ib()
577 _file_size = attr.ib(default=None)
578 is_volatile = attr.ib(default=False)
579
580 def file_size(self, vfs):
581 if self._file_size is None:
582 try:
583 self._file_size = vfs.stat(self.unencoded_path).st_size
584 except FileNotFoundError:
585 self._file_size = 0
586 return self._file_size
587
588
589 592 def _gather_revlog(files_data):
590 593 """group files per revlog prefix
591 594
592 595 The returns a two level nested dict. The top level key is the revlog prefix
593 596 without extension, the second level is all the file "suffix" that were
594 597 seen for this revlog and arbitrary file data as value.
595 598 """
596 599 revlogs = collections.defaultdict(dict)
597 600 for u, value in files_data:
598 601 name, ext = _split_revlog_ext(u)
599 602 revlogs[name][ext] = value
600 603 return sorted(revlogs.items())
601 604
602 605
603 606 def _split_revlog_ext(filename):
604 607 """split the revlog file prefix from the variable extension"""
605 608 if filename.endswith(REVLOG_FILES_LONG_EXT):
606 609 char = b'-'
607 610 else:
608 611 char = b'.'
609 612 idx = filename.rfind(char)
610 613 return filename[:idx], filename[idx:]
611 614
612 615
613 616 def _ext_key(ext):
614 617 """a key to order revlog suffix
615 618
616 619 important to issue .i after other entry."""
617 620 # the only important part of this order is to keep the `.i` last.
618 621 if ext.endswith(b'.n'):
619 622 return (0, ext)
620 623 elif ext.endswith(b'.nd'):
621 624 return (10, ext)
622 625 elif ext.endswith(b'.d'):
623 626 return (20, ext)
624 627 elif ext.endswith(b'.i'):
625 628 return (50, ext)
626 629 else:
627 630 return (40, ext)
628 631
629 632
630 633 class basicstore:
631 634 '''base class for local repository stores'''
632 635
633 636 def __init__(self, path, vfstype):
634 637 vfs = vfstype(path)
635 638 self.path = vfs.base
636 639 self.createmode = _calcmode(vfs)
637 640 vfs.createmode = self.createmode
638 641 self.rawvfs = vfs
639 642 self.vfs = vfsmod.filtervfs(vfs, encodedir)
640 643 self.opener = self.vfs
641 644
642 645 def join(self, f):
643 646 return self.path + b'/' + encodedir(f)
644 647
645 648 def _walk(self, relpath, recurse, undecodable=None):
646 649 '''yields (revlog_type, unencoded, size)'''
647 650 path = self.path
648 651 if relpath:
649 652 path += b'/' + relpath
650 653 striplen = len(self.path) + 1
651 654 l = []
652 655 if self.rawvfs.isdir(path):
653 656 visit = [path]
654 657 readdir = self.rawvfs.readdir
655 658 while visit:
656 659 p = visit.pop()
657 660 for f, kind, st in readdir(p, stat=True):
658 661 fp = p + b'/' + f
659 662 rl_type = is_revlog(f, kind, st)
660 663 if rl_type is not None:
661 664 n = util.pconvert(fp[striplen:])
662 665 l.append((decodedir(n), (rl_type, st.st_size)))
663 666 elif kind == stat.S_IFDIR and recurse:
664 667 visit.append(fp)
665 668
666 669 l.sort()
667 670 return l
668 671
669 672 def changelog(self, trypending, concurrencychecker=None):
670 673 return changelog.changelog(
671 674 self.vfs,
672 675 trypending=trypending,
673 676 concurrencychecker=concurrencychecker,
674 677 )
675 678
676 679 def manifestlog(self, repo, storenarrowmatch):
677 680 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
678 681 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
679 682
680 683 def data_entries(
681 684 self, matcher=None, undecodable=None
682 685 ) -> Generator[BaseStoreEntry, None, None]:
683 686 """Like walk, but excluding the changelog and root manifest.
684 687
685 688 When [undecodable] is None, revlogs names that can't be
686 689 decoded cause an exception. When it is provided, it should
687 690 be a list and the filenames that can't be decoded are added
688 691 to it instead. This is very rarely needed."""
689 692 dirs = [
690 693 (b'data', FILEFLAGS_FILELOG, False),
691 694 (b'meta', FILEFLAGS_MANIFESTLOG, True),
692 695 ]
693 696 for base_dir, rl_type, strip_filename in dirs:
694 697 files = self._walk(base_dir, True, undecodable=undecodable)
695 698 files = (f for f in files if f[1][0] is not None)
696 699 for revlog, details in _gather_revlog(files):
697 700 file_details = {}
698 701 revlog_target_id = revlog.split(b'/', 1)[1]
699 702 if strip_filename and b'/' in revlog:
700 703 revlog_target_id = revlog_target_id.rsplit(b'/', 1)[0]
701 704 revlog_target_id += b'/'
702 705 for ext, (t, s) in sorted(details.items()):
703 706 file_details[ext] = {
704 707 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
705 708 'file_size': s,
706 709 }
707 710 yield RevlogStoreEntry(
708 711 path_prefix=revlog,
709 712 revlog_type=rl_type,
710 713 target_id=revlog_target_id,
711 714 details=file_details,
712 715 )
713 716
714 717 def top_entries(
715 718 self, phase=False, obsolescence=False
716 719 ) -> Generator[BaseStoreEntry, None, None]:
717 720 if phase and self.vfs.exists(b'phaseroots'):
718 721 yield SimpleStoreEntry(
719 722 entry_path=b'phaseroots',
720 723 is_volatile=True,
721 724 )
722 725
723 726 if obsolescence and self.vfs.exists(b'obsstore'):
724 727 # XXX if we had the file size it could be non-volatile
725 728 yield SimpleStoreEntry(
726 729 entry_path=b'obsstore',
727 730 is_volatile=True,
728 731 )
729 732
730 733 files = reversed(self._walk(b'', False))
731 734
732 735 changelogs = collections.defaultdict(dict)
733 736 manifestlogs = collections.defaultdict(dict)
734 737
735 738 for u, (t, s) in files:
736 739 if u.startswith(b'00changelog'):
737 740 name, ext = _split_revlog_ext(u)
738 741 changelogs[name][ext] = (t, s)
739 742 elif u.startswith(b'00manifest'):
740 743 name, ext = _split_revlog_ext(u)
741 744 manifestlogs[name][ext] = (t, s)
742 745 else:
743 746 yield SimpleStoreEntry(
744 747 entry_path=u,
745 748 is_volatile=bool(t & FILEFLAGS_VOLATILE),
746 749 file_size=s,
747 750 )
748 751 # yield manifest before changelog
749 752 top_rl = [
750 753 (manifestlogs, FILEFLAGS_MANIFESTLOG),
751 754 (changelogs, FILEFLAGS_CHANGELOG),
752 755 ]
753 756 assert len(manifestlogs) <= 1
754 757 assert len(changelogs) <= 1
755 758 for data, revlog_type in top_rl:
756 759 for revlog, details in sorted(data.items()):
757 760 file_details = {}
758 761 for ext, (t, s) in details.items():
759 762 file_details[ext] = {
760 763 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
761 764 'file_size': s,
762 765 }
763 766 yield RevlogStoreEntry(
764 767 path_prefix=revlog,
765 768 revlog_type=revlog_type,
766 769 target_id=b'',
767 770 details=file_details,
768 771 )
769 772
770 773 def walk(
771 774 self, matcher=None, phase=False, obsolescence=False
772 775 ) -> Generator[BaseStoreEntry, None, None]:
773 776 """return files related to data storage (ie: revlogs)
774 777
775 778 yields instance from BaseStoreEntry subclasses
776 779
777 780 if a matcher is passed, storage files of only those tracked paths
778 781 are passed with matches the matcher
779 782 """
780 783 # yield data files first
781 784 for x in self.data_entries(matcher):
782 785 yield x
783 786 for x in self.top_entries(phase=phase, obsolescence=obsolescence):
784 787 yield x
785 788
786 789 def copylist(self):
787 790 return _data
788 791
789 792 def write(self, tr):
790 793 pass
791 794
792 795 def invalidatecaches(self):
793 796 pass
794 797
795 798 def markremoved(self, fn):
796 799 pass
797 800
798 801 def __contains__(self, path):
799 802 '''Checks if the store contains path'''
800 803 path = b"/".join((b"data", path))
801 804 # file?
802 805 if self.vfs.exists(path + b".i"):
803 806 return True
804 807 # dir?
805 808 if not path.endswith(b"/"):
806 809 path = path + b"/"
807 810 return self.vfs.exists(path)
808 811
809 812
810 813 class encodedstore(basicstore):
811 814 def __init__(self, path, vfstype):
812 815 vfs = vfstype(path + b'/store')
813 816 self.path = vfs.base
814 817 self.createmode = _calcmode(vfs)
815 818 vfs.createmode = self.createmode
816 819 self.rawvfs = vfs
817 820 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
818 821 self.opener = self.vfs
819 822
820 823 def _walk(self, relpath, recurse, undecodable=None):
821 824 old = super()._walk(relpath, recurse)
822 825 new = []
823 826 for f1, value in old:
824 827 try:
825 828 f2 = decodefilename(f1)
826 829 except KeyError:
827 830 if undecodable is None:
828 831 msg = _(b'undecodable revlog name %s') % f1
829 832 raise error.StorageError(msg)
830 833 else:
831 834 undecodable.append(f1)
832 835 continue
833 836 new.append((f2, value))
834 837 return new
835 838
836 839 def data_entries(
837 840 self, matcher=None, undecodable=None
838 841 ) -> Generator[BaseStoreEntry, None, None]:
839 842 entries = super(encodedstore, self).data_entries(
840 843 undecodable=undecodable
841 844 )
842 845 for entry in entries:
843 846 if _match_tracked_entry(entry, matcher):
844 847 yield entry
845 848
846 849 def join(self, f):
847 850 return self.path + b'/' + encodefilename(f)
848 851
849 852 def copylist(self):
850 853 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
851 854
852 855
853 856 class fncache:
854 857 # the filename used to be partially encoded
855 858 # hence the encodedir/decodedir dance
856 859 def __init__(self, vfs):
857 860 self.vfs = vfs
858 861 self._ignores = set()
859 862 self.entries = None
860 863 self._dirty = False
861 864 # set of new additions to fncache
862 865 self.addls = set()
863 866
864 867 def ensureloaded(self, warn=None):
865 868 """read the fncache file if not already read.
866 869
867 870 If the file on disk is corrupted, raise. If warn is provided,
868 871 warn and keep going instead."""
869 872 if self.entries is None:
870 873 self._load(warn)
871 874
872 875 def _load(self, warn=None):
873 876 '''fill the entries from the fncache file'''
874 877 self._dirty = False
875 878 try:
876 879 fp = self.vfs(b'fncache', mode=b'rb')
877 880 except IOError:
878 881 # skip nonexistent file
879 882 self.entries = set()
880 883 return
881 884
882 885 self.entries = set()
883 886 chunk = b''
884 887 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
885 888 chunk += c
886 889 try:
887 890 p = chunk.rindex(b'\n')
888 891 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
889 892 chunk = chunk[p + 1 :]
890 893 except ValueError:
891 894 # substring '\n' not found, maybe the entry is bigger than the
892 895 # chunksize, so let's keep iterating
893 896 pass
894 897
895 898 if chunk:
896 899 msg = _(b"fncache does not ends with a newline")
897 900 if warn:
898 901 warn(msg + b'\n')
899 902 else:
900 903 raise error.Abort(
901 904 msg,
902 905 hint=_(
903 906 b"use 'hg debugrebuildfncache' to "
904 907 b"rebuild the fncache"
905 908 ),
906 909 )
907 910 self._checkentries(fp, warn)
908 911 fp.close()
909 912
910 913 def _checkentries(self, fp, warn):
911 914 """make sure there is no empty string in entries"""
912 915 if b'' in self.entries:
913 916 fp.seek(0)
914 917 for n, line in enumerate(fp):
915 918 if not line.rstrip(b'\n'):
916 919 t = _(b'invalid entry in fncache, line %d') % (n + 1)
917 920 if warn:
918 921 warn(t + b'\n')
919 922 else:
920 923 raise error.Abort(t)
921 924
922 925 def write(self, tr):
923 926 if self._dirty:
924 927 assert self.entries is not None
925 928 self.entries = self.entries | self.addls
926 929 self.addls = set()
927 930 tr.addbackup(b'fncache')
928 931 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
929 932 if self.entries:
930 933 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
931 934 fp.close()
932 935 self._dirty = False
933 936 if self.addls:
934 937 # if we have just new entries, let's append them to the fncache
935 938 tr.addbackup(b'fncache')
936 939 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
937 940 if self.addls:
938 941 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
939 942 fp.close()
940 943 self.entries = None
941 944 self.addls = set()
942 945
943 946 def addignore(self, fn):
944 947 self._ignores.add(fn)
945 948
946 949 def add(self, fn):
947 950 if fn in self._ignores:
948 951 return
949 952 if self.entries is None:
950 953 self._load()
951 954 if fn not in self.entries:
952 955 self.addls.add(fn)
953 956
954 957 def remove(self, fn):
955 958 if self.entries is None:
956 959 self._load()
957 960 if fn in self.addls:
958 961 self.addls.remove(fn)
959 962 return
960 963 try:
961 964 self.entries.remove(fn)
962 965 self._dirty = True
963 966 except KeyError:
964 967 pass
965 968
966 969 def __contains__(self, fn):
967 970 if fn in self.addls:
968 971 return True
969 972 if self.entries is None:
970 973 self._load()
971 974 return fn in self.entries
972 975
973 976 def __iter__(self):
974 977 if self.entries is None:
975 978 self._load()
976 979 return iter(self.entries | self.addls)
977 980
978 981
979 982 class _fncachevfs(vfsmod.proxyvfs):
980 983 def __init__(self, vfs, fnc, encode):
981 984 vfsmod.proxyvfs.__init__(self, vfs)
982 985 self.fncache = fnc
983 986 self.encode = encode
984 987
985 988 def __call__(self, path, mode=b'r', *args, **kw):
986 989 encoded = self.encode(path)
987 990 if (
988 991 mode not in (b'r', b'rb')
989 992 and (path.startswith(b'data/') or path.startswith(b'meta/'))
990 993 and revlog_type(path) is not None
991 994 ):
992 995 # do not trigger a fncache load when adding a file that already is
993 996 # known to exist.
994 997 notload = self.fncache.entries is None and self.vfs.exists(encoded)
995 998 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
996 999 # when appending to an existing file, if the file has size zero,
997 1000 # it should be considered as missing. Such zero-size files are
998 1001 # the result of truncation when a transaction is aborted.
999 1002 notload = False
1000 1003 if not notload:
1001 1004 self.fncache.add(path)
1002 1005 return self.vfs(encoded, mode, *args, **kw)
1003 1006
1004 1007 def join(self, path):
1005 1008 if path:
1006 1009 return self.vfs.join(self.encode(path))
1007 1010 else:
1008 1011 return self.vfs.join(path)
1009 1012
1010 1013 def register_file(self, path):
1011 1014 """generic hook point to lets fncache steer its stew"""
1012 1015 if path.startswith(b'data/') or path.startswith(b'meta/'):
1013 1016 self.fncache.add(path)
1014 1017
1015 1018
1016 1019 class fncachestore(basicstore):
1017 1020 def __init__(self, path, vfstype, dotencode):
1018 1021 if dotencode:
1019 1022 encode = _pathencode
1020 1023 else:
1021 1024 encode = _plainhybridencode
1022 1025 self.encode = encode
1023 1026 vfs = vfstype(path + b'/store')
1024 1027 self.path = vfs.base
1025 1028 self.pathsep = self.path + b'/'
1026 1029 self.createmode = _calcmode(vfs)
1027 1030 vfs.createmode = self.createmode
1028 1031 self.rawvfs = vfs
1029 1032 fnc = fncache(vfs)
1030 1033 self.fncache = fnc
1031 1034 self.vfs = _fncachevfs(vfs, fnc, encode)
1032 1035 self.opener = self.vfs
1033 1036
1034 1037 def join(self, f):
1035 1038 return self.pathsep + self.encode(f)
1036 1039
1037 1040 def getsize(self, path):
1038 1041 return self.rawvfs.stat(path).st_size
1039 1042
1040 1043 def data_entries(
1041 1044 self, matcher=None, undecodable=None
1042 1045 ) -> Generator[BaseStoreEntry, None, None]:
1043 1046 files = ((f, revlog_type(f)) for f in self.fncache)
1044 1047 # Note: all files in fncache should be revlog related, However the
1045 1048 # fncache might contains such file added by previous version of
1046 1049 # Mercurial.
1047 1050 files = (f for f in files if f[1] is not None)
1048 1051 by_revlog = _gather_revlog(files)
1049 1052 for revlog, details in by_revlog:
1050 1053 file_details = {}
1051 1054 if revlog.startswith(b'data/'):
1052 1055 rl_type = FILEFLAGS_FILELOG
1053 1056 revlog_target_id = revlog.split(b'/', 1)[1]
1054 1057 elif revlog.startswith(b'meta/'):
1055 1058 rl_type = FILEFLAGS_MANIFESTLOG
1056 1059 # drop the initial directory and the `00manifest` file part
1057 1060 tmp = revlog.split(b'/', 1)[1]
1058 1061 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
1059 1062 else:
1060 1063 # unreachable
1061 1064 assert False, revlog
1062 1065 for ext, t in details.items():
1063 1066 file_details[ext] = {
1064 1067 'is_volatile': bool(t & FILEFLAGS_VOLATILE),
1065 1068 }
1066 1069 entry = RevlogStoreEntry(
1067 1070 path_prefix=revlog,
1068 1071 revlog_type=rl_type,
1069 1072 target_id=revlog_target_id,
1070 1073 details=file_details,
1071 1074 )
1072 1075 if _match_tracked_entry(entry, matcher):
1073 1076 yield entry
1074 1077
1075 1078 def copylist(self):
1076 1079 d = (
1077 1080 b'bookmarks',
1078 1081 b'narrowspec',
1079 1082 b'data',
1080 1083 b'meta',
1081 1084 b'dh',
1082 1085 b'fncache',
1083 1086 b'phaseroots',
1084 1087 b'obsstore',
1085 1088 b'00manifest.d',
1086 1089 b'00manifest.i',
1087 1090 b'00changelog.d',
1088 1091 b'00changelog.i',
1089 1092 b'requires',
1090 1093 )
1091 1094 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1092 1095
1093 1096 def write(self, tr):
1094 1097 self.fncache.write(tr)
1095 1098
1096 1099 def invalidatecaches(self):
1097 1100 self.fncache.entries = None
1098 1101 self.fncache.addls = set()
1099 1102
1100 1103 def markremoved(self, fn):
1101 1104 self.fncache.remove(fn)
1102 1105
1103 1106 def _exists(self, f):
1104 1107 ef = self.encode(f)
1105 1108 try:
1106 1109 self.getsize(ef)
1107 1110 return True
1108 1111 except FileNotFoundError:
1109 1112 return False
1110 1113
1111 1114 def __contains__(self, path):
1112 1115 '''Checks if the store contains path'''
1113 1116 path = b"/".join((b"data", path))
1114 1117 # check for files (exact match)
1115 1118 e = path + b'.i'
1116 1119 if e in self.fncache and self._exists(e):
1117 1120 return True
1118 1121 # now check for directories (prefix match)
1119 1122 if not path.endswith(b'/'):
1120 1123 path += b'/'
1121 1124 for e in self.fncache:
1122 1125 if e.startswith(path) and self._exists(e):
1123 1126 return True
1124 1127 return False
General Comments 0
You need to be logged in to leave comments. Login now