##// END OF EJS Templates
store: split the wrapping of encodedstore between _wrap and datafiles...
marmoute -
r51386:816e8bc6 default
parent child Browse files
Show More
@@ -1,1058 +1,1058
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 399 REVLOG_FILES_LONG_EXT = (
400 400 b'.nd',
401 401 b'.idx',
402 402 b'.dat',
403 403 b'.sda',
404 404 )
405 405 # files that are "volatile" and might change between listing and streaming
406 406 #
407 407 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 408 # deleted.
409 409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410 410
411 411 # some exception to the above matching
412 412 #
413 413 # XXX This is currently not in use because of issue6542
414 414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415 415
416 416
417 417 def is_revlog(f, kind, st):
418 418 if kind != stat.S_IFREG:
419 419 return None
420 420 return revlog_type(f)
421 421
422 422
423 423 def revlog_type(f):
424 424 # XXX we need to filter `undo.` created by the transaction here, however
425 425 # being naive about it also filter revlog for `undo.*` files, leading to
426 426 # issue6542. So we no longer use EXCLUDED.
427 427 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 428 return FILEFLAGS_REVLOG_MAIN
429 429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 430 t = FILETYPE_FILELOG_OTHER
431 431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 432 t |= FILEFLAGS_VOLATILE
433 433 return t
434 434 return None
435 435
436 436
437 437 # the file is part of changelog data
438 438 FILEFLAGS_CHANGELOG = 1 << 13
439 439 # the file is part of manifest data
440 440 FILEFLAGS_MANIFESTLOG = 1 << 12
441 441 # the file is part of filelog data
442 442 FILEFLAGS_FILELOG = 1 << 11
443 443 # file that are not directly part of a revlog
444 444 FILEFLAGS_OTHER = 1 << 10
445 445
446 446 # the main entry point for a revlog
447 447 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 448 # a secondary file for a revlog
449 449 FILEFLAGS_REVLOG_OTHER = 1 << 0
450 450
451 451 # files that are "volatile" and might change between listing and streaming
452 452 FILEFLAGS_VOLATILE = 1 << 20
453 453
454 454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 460 FILETYPE_OTHER = FILEFLAGS_OTHER
461 461
462 462
463 463 @attr.s(slots=True, init=False)
464 464 class BaseStoreEntry:
465 465 """An entry in the store
466 466
467 467 This is returned by `store.walk` and represent some data in the store."""
468 468
469 469 unencoded_path = attr.ib()
470 470 _is_volatile = attr.ib(default=False)
471 471 _file_size = attr.ib(default=None)
472 472
473 473 def __init__(
474 474 self,
475 475 unencoded_path,
476 476 is_volatile=False,
477 477 file_size=None,
478 478 ):
479 479 self.unencoded_path = unencoded_path
480 480 self._is_volatile = is_volatile
481 481 self._file_size = file_size
482 482
483 483 def files(self):
484 484 return [
485 485 StoreFile(
486 486 unencoded_path=self.unencoded_path,
487 487 file_size=self._file_size,
488 488 is_volatile=self._is_volatile,
489 489 )
490 490 ]
491 491
492 492
493 493 @attr.s(slots=True, init=False)
494 494 class SimpleStoreEntry(BaseStoreEntry):
495 495 """A generic entry in the store"""
496 496
497 497 is_revlog = False
498 498
499 499
500 500 @attr.s(slots=True, init=False)
501 501 class RevlogStoreEntry(BaseStoreEntry):
502 502 """A revlog entry in the store"""
503 503
504 504 is_revlog = True
505 505 revlog_type = attr.ib(default=None)
506 506 target_id = attr.ib(default=None)
507 507 is_revlog_main = attr.ib(default=None)
508 508
509 509 def __init__(
510 510 self,
511 511 unencoded_path,
512 512 revlog_type,
513 513 target_id,
514 514 is_revlog_main=False,
515 515 is_volatile=False,
516 516 file_size=None,
517 517 ):
518 518 super().__init__(
519 519 unencoded_path=unencoded_path,
520 520 is_volatile=is_volatile,
521 521 file_size=file_size,
522 522 )
523 523 self.revlog_type = revlog_type
524 524 self.target_id = target_id
525 525 self.is_revlog_main = is_revlog_main
526 526
527 527 def main_file_path(self):
528 528 """unencoded path of the main revlog file"""
529 529 return self.unencoded_path
530 530
531 531
532 532 @attr.s(slots=True)
533 533 class StoreFile:
534 534 """a file matching an entry"""
535 535
536 536 unencoded_path = attr.ib()
537 537 _file_size = attr.ib(default=False)
538 538 is_volatile = attr.ib(default=False)
539 539
540 540 def file_size(self, vfs):
541 541 if self._file_size is not None:
542 542 return self._file_size
543 543 try:
544 544 return vfs.stat(self.unencoded_path).st_size
545 545 except FileNotFoundError:
546 546 return 0
547 547
548 548
549 549 def _gather_revlog(files_data):
550 550 """group files per revlog prefix
551 551
552 552 The returns a two level nested dict. The top level key is the revlog prefix
553 553 without extension, the second level is all the file "suffix" that were
554 554 seen for this revlog and arbitrary file data as value.
555 555 """
556 556 revlogs = collections.defaultdict(dict)
557 557 for u, value in files_data:
558 558 name, ext = _split_revlog_ext(u)
559 559 revlogs[name][ext] = value
560 560 return sorted(revlogs.items())
561 561
562 562
563 563 def _split_revlog_ext(filename):
564 564 """split the revlog file prefix from the variable extension"""
565 565 if filename.endswith(REVLOG_FILES_LONG_EXT):
566 566 char = b'-'
567 567 else:
568 568 char = b'.'
569 569 idx = filename.rfind(char)
570 570 return filename[:idx], filename[idx:]
571 571
572 572
573 573 def _ext_key(ext):
574 574 """a key to order revlog suffix
575 575
576 576 important to issue .i after other entry."""
577 577 # the only important part of this order is to keep the `.i` last.
578 578 if ext.endswith(b'.n'):
579 579 return (0, ext)
580 580 elif ext.endswith(b'.nd'):
581 581 return (10, ext)
582 582 elif ext.endswith(b'.d'):
583 583 return (20, ext)
584 584 elif ext.endswith(b'.i'):
585 585 return (50, ext)
586 586 else:
587 587 return (40, ext)
588 588
589 589
590 590 class basicstore:
591 591 '''base class for local repository stores'''
592 592
593 593 def __init__(self, path, vfstype):
594 594 vfs = vfstype(path)
595 595 self.path = vfs.base
596 596 self.createmode = _calcmode(vfs)
597 597 vfs.createmode = self.createmode
598 598 self.rawvfs = vfs
599 599 self.vfs = vfsmod.filtervfs(vfs, encodedir)
600 600 self.opener = self.vfs
601 601
602 602 def join(self, f):
603 603 return self.path + b'/' + encodedir(f)
604 604
605 def _walk(self, relpath, recurse):
605 def _walk(self, relpath, recurse, undecodable=None):
606 606 '''yields (revlog_type, unencoded, size)'''
607 607 path = self.path
608 608 if relpath:
609 609 path += b'/' + relpath
610 610 striplen = len(self.path) + 1
611 611 l = []
612 612 if self.rawvfs.isdir(path):
613 613 visit = [path]
614 614 readdir = self.rawvfs.readdir
615 615 while visit:
616 616 p = visit.pop()
617 617 for f, kind, st in readdir(p, stat=True):
618 618 fp = p + b'/' + f
619 619 rl_type = is_revlog(f, kind, st)
620 620 if rl_type is not None:
621 621 n = util.pconvert(fp[striplen:])
622 622 l.append((decodedir(n), (rl_type, st.st_size)))
623 623 elif kind == stat.S_IFDIR and recurse:
624 624 visit.append(fp)
625 625
626 626 l.sort()
627 627 return l
628 628
629 629 def changelog(self, trypending, concurrencychecker=None):
630 630 return changelog.changelog(
631 631 self.vfs,
632 632 trypending=trypending,
633 633 concurrencychecker=concurrencychecker,
634 634 )
635 635
636 636 def manifestlog(self, repo, storenarrowmatch):
637 637 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
638 638 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
639 639
640 640 def datafiles(
641 641 self, matcher=None, undecodable=None
642 642 ) -> Generator[BaseStoreEntry, None, None]:
643 643 """Like walk, but excluding the changelog and root manifest.
644 644
645 645 When [undecodable] is None, revlogs names that can't be
646 646 decoded cause an exception. When it is provided, it should
647 647 be a list and the filenames that can't be decoded are added
648 648 to it instead. This is very rarely needed."""
649 649 dirs = [
650 650 (b'data', FILEFLAGS_FILELOG),
651 651 (b'meta', FILEFLAGS_MANIFESTLOG),
652 652 ]
653 653 for base_dir, rl_type in dirs:
654 files = self._walk(base_dir, True)
654 files = self._walk(base_dir, True, undecodable=undecodable)
655 655 files = (f for f in files if f[1][0] is not None)
656 656 for revlog, details in _gather_revlog(files):
657 657 for ext, (t, s) in sorted(details.items()):
658 658 u = revlog + ext
659 659 revlog_target_id = revlog.split(b'/', 1)[1]
660 660 yield RevlogStoreEntry(
661 661 unencoded_path=u,
662 662 revlog_type=rl_type,
663 663 target_id=revlog_target_id,
664 664 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
665 665 is_volatile=bool(t & FILEFLAGS_VOLATILE),
666 666 file_size=s,
667 667 )
668 668
669 669 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
670 670 files = reversed(self._walk(b'', False))
671 671
672 672 changelogs = collections.defaultdict(dict)
673 673 manifestlogs = collections.defaultdict(dict)
674 674
675 675 for u, (t, s) in files:
676 676 if u.startswith(b'00changelog'):
677 677 name, ext = _split_revlog_ext(u)
678 678 changelogs[name][ext] = (t, s)
679 679 elif u.startswith(b'00manifest'):
680 680 name, ext = _split_revlog_ext(u)
681 681 manifestlogs[name][ext] = (t, s)
682 682 else:
683 683 yield SimpleStoreEntry(
684 684 unencoded_path=u,
685 685 is_volatile=bool(t & FILEFLAGS_VOLATILE),
686 686 file_size=s,
687 687 )
688 688 # yield manifest before changelog
689 689 top_rl = [
690 690 (manifestlogs, FILEFLAGS_MANIFESTLOG),
691 691 (changelogs, FILEFLAGS_CHANGELOG),
692 692 ]
693 693 assert len(manifestlogs) <= 1
694 694 assert len(changelogs) <= 1
695 695 for data, revlog_type in top_rl:
696 696 for revlog, details in sorted(data.items()):
697 697 # (keeping ordering so we get 00changelog.i last)
698 698 key = lambda x: _ext_key(x[0])
699 699 for ext, (t, s) in sorted(details.items(), key=key):
700 700 u = revlog + ext
701 701 yield RevlogStoreEntry(
702 702 unencoded_path=u,
703 703 revlog_type=revlog_type,
704 704 target_id=b'',
705 705 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
706 706 is_volatile=bool(t & FILEFLAGS_VOLATILE),
707 707 file_size=s,
708 708 )
709 709
710 710 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
711 711 """return files related to data storage (ie: revlogs)
712 712
713 713 yields (file_type, unencoded, size)
714 714
715 715 if a matcher is passed, storage files of only those tracked paths
716 716 are passed with matches the matcher
717 717 """
718 718 # yield data files first
719 719 for x in self.datafiles(matcher):
720 720 yield x
721 721 for x in self.topfiles():
722 722 yield x
723 723
724 724 def copylist(self):
725 725 return _data
726 726
727 727 def write(self, tr):
728 728 pass
729 729
730 730 def invalidatecaches(self):
731 731 pass
732 732
733 733 def markremoved(self, fn):
734 734 pass
735 735
736 736 def __contains__(self, path):
737 737 '''Checks if the store contains path'''
738 738 path = b"/".join((b"data", path))
739 739 # file?
740 740 if self.vfs.exists(path + b".i"):
741 741 return True
742 742 # dir?
743 743 if not path.endswith(b"/"):
744 744 path = path + b"/"
745 745 return self.vfs.exists(path)
746 746
747 747
748 748 class encodedstore(basicstore):
749 749 def __init__(self, path, vfstype):
750 750 vfs = vfstype(path + b'/store')
751 751 self.path = vfs.base
752 752 self.createmode = _calcmode(vfs)
753 753 vfs.createmode = self.createmode
754 754 self.rawvfs = vfs
755 755 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
756 756 self.opener = self.vfs
757 757
758 # note: topfiles would also need a decode phase. It is just that in
759 # practice we do not have any file outside of `data/` that needs encoding.
760 # However that might change so we should probably add a test and encoding
761 # decoding for it too. see issue6548
762
763 def datafiles(
764 self, matcher=None, undecodable=None
765 ) -> Generator[BaseStoreEntry, None, None]:
766 for entry in super(encodedstore, self).datafiles():
758 def _walk(self, relpath, recurse, undecodable=None):
759 old = super()._walk(relpath, recurse)
760 new = []
761 for f1, value in old:
767 762 try:
768 f1 = entry.unencoded_path
769 763 f2 = decodefilename(f1)
770 764 except KeyError:
771 765 if undecodable is None:
772 766 msg = _(b'undecodable revlog name %s') % f1
773 767 raise error.StorageError(msg)
774 768 else:
775 769 undecodable.append(f1)
776 770 continue
777 if not _matchtrackedpath(f2, matcher):
778 continue
779 entry.unencoded_path = f2
771 new.append((f2, value))
772 return new
773
774 def datafiles(
775 self, matcher=None, undecodable=None
776 ) -> Generator[BaseStoreEntry, None, None]:
777 entries = super(encodedstore, self).datafiles(undecodable=undecodable)
778 for entry in entries:
779 if _matchtrackedpath(entry.unencoded_path, matcher):
780 780 yield entry
781 781
782 782 def join(self, f):
783 783 return self.path + b'/' + encodefilename(f)
784 784
785 785 def copylist(self):
786 786 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
787 787
788 788
789 789 class fncache:
790 790 # the filename used to be partially encoded
791 791 # hence the encodedir/decodedir dance
792 792 def __init__(self, vfs):
793 793 self.vfs = vfs
794 794 self._ignores = set()
795 795 self.entries = None
796 796 self._dirty = False
797 797 # set of new additions to fncache
798 798 self.addls = set()
799 799
800 800 def ensureloaded(self, warn=None):
801 801 """read the fncache file if not already read.
802 802
803 803 If the file on disk is corrupted, raise. If warn is provided,
804 804 warn and keep going instead."""
805 805 if self.entries is None:
806 806 self._load(warn)
807 807
808 808 def _load(self, warn=None):
809 809 '''fill the entries from the fncache file'''
810 810 self._dirty = False
811 811 try:
812 812 fp = self.vfs(b'fncache', mode=b'rb')
813 813 except IOError:
814 814 # skip nonexistent file
815 815 self.entries = set()
816 816 return
817 817
818 818 self.entries = set()
819 819 chunk = b''
820 820 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
821 821 chunk += c
822 822 try:
823 823 p = chunk.rindex(b'\n')
824 824 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
825 825 chunk = chunk[p + 1 :]
826 826 except ValueError:
827 827 # substring '\n' not found, maybe the entry is bigger than the
828 828 # chunksize, so let's keep iterating
829 829 pass
830 830
831 831 if chunk:
832 832 msg = _(b"fncache does not ends with a newline")
833 833 if warn:
834 834 warn(msg + b'\n')
835 835 else:
836 836 raise error.Abort(
837 837 msg,
838 838 hint=_(
839 839 b"use 'hg debugrebuildfncache' to "
840 840 b"rebuild the fncache"
841 841 ),
842 842 )
843 843 self._checkentries(fp, warn)
844 844 fp.close()
845 845
846 846 def _checkentries(self, fp, warn):
847 847 """make sure there is no empty string in entries"""
848 848 if b'' in self.entries:
849 849 fp.seek(0)
850 850 for n, line in enumerate(fp):
851 851 if not line.rstrip(b'\n'):
852 852 t = _(b'invalid entry in fncache, line %d') % (n + 1)
853 853 if warn:
854 854 warn(t + b'\n')
855 855 else:
856 856 raise error.Abort(t)
857 857
858 858 def write(self, tr):
859 859 if self._dirty:
860 860 assert self.entries is not None
861 861 self.entries = self.entries | self.addls
862 862 self.addls = set()
863 863 tr.addbackup(b'fncache')
864 864 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
865 865 if self.entries:
866 866 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
867 867 fp.close()
868 868 self._dirty = False
869 869 if self.addls:
870 870 # if we have just new entries, let's append them to the fncache
871 871 tr.addbackup(b'fncache')
872 872 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
873 873 if self.addls:
874 874 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
875 875 fp.close()
876 876 self.entries = None
877 877 self.addls = set()
878 878
879 879 def addignore(self, fn):
880 880 self._ignores.add(fn)
881 881
882 882 def add(self, fn):
883 883 if fn in self._ignores:
884 884 return
885 885 if self.entries is None:
886 886 self._load()
887 887 if fn not in self.entries:
888 888 self.addls.add(fn)
889 889
890 890 def remove(self, fn):
891 891 if self.entries is None:
892 892 self._load()
893 893 if fn in self.addls:
894 894 self.addls.remove(fn)
895 895 return
896 896 try:
897 897 self.entries.remove(fn)
898 898 self._dirty = True
899 899 except KeyError:
900 900 pass
901 901
902 902 def __contains__(self, fn):
903 903 if fn in self.addls:
904 904 return True
905 905 if self.entries is None:
906 906 self._load()
907 907 return fn in self.entries
908 908
909 909 def __iter__(self):
910 910 if self.entries is None:
911 911 self._load()
912 912 return iter(self.entries | self.addls)
913 913
914 914
915 915 class _fncachevfs(vfsmod.proxyvfs):
916 916 def __init__(self, vfs, fnc, encode):
917 917 vfsmod.proxyvfs.__init__(self, vfs)
918 918 self.fncache = fnc
919 919 self.encode = encode
920 920
921 921 def __call__(self, path, mode=b'r', *args, **kw):
922 922 encoded = self.encode(path)
923 923 if (
924 924 mode not in (b'r', b'rb')
925 925 and (path.startswith(b'data/') or path.startswith(b'meta/'))
926 926 and revlog_type(path) is not None
927 927 ):
928 928 # do not trigger a fncache load when adding a file that already is
929 929 # known to exist.
930 930 notload = self.fncache.entries is None and self.vfs.exists(encoded)
931 931 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
932 932 # when appending to an existing file, if the file has size zero,
933 933 # it should be considered as missing. Such zero-size files are
934 934 # the result of truncation when a transaction is aborted.
935 935 notload = False
936 936 if not notload:
937 937 self.fncache.add(path)
938 938 return self.vfs(encoded, mode, *args, **kw)
939 939
940 940 def join(self, path):
941 941 if path:
942 942 return self.vfs.join(self.encode(path))
943 943 else:
944 944 return self.vfs.join(path)
945 945
946 946 def register_file(self, path):
947 947 """generic hook point to lets fncache steer its stew"""
948 948 if path.startswith(b'data/') or path.startswith(b'meta/'):
949 949 self.fncache.add(path)
950 950
951 951
952 952 class fncachestore(basicstore):
953 953 def __init__(self, path, vfstype, dotencode):
954 954 if dotencode:
955 955 encode = _pathencode
956 956 else:
957 957 encode = _plainhybridencode
958 958 self.encode = encode
959 959 vfs = vfstype(path + b'/store')
960 960 self.path = vfs.base
961 961 self.pathsep = self.path + b'/'
962 962 self.createmode = _calcmode(vfs)
963 963 vfs.createmode = self.createmode
964 964 self.rawvfs = vfs
965 965 fnc = fncache(vfs)
966 966 self.fncache = fnc
967 967 self.vfs = _fncachevfs(vfs, fnc, encode)
968 968 self.opener = self.vfs
969 969
970 970 def join(self, f):
971 971 return self.pathsep + self.encode(f)
972 972
973 973 def getsize(self, path):
974 974 return self.rawvfs.stat(path).st_size
975 975
976 976 def datafiles(
977 977 self, matcher=None, undecodable=None
978 978 ) -> Generator[BaseStoreEntry, None, None]:
979 979 files = ((f, revlog_type(f)) for f in self.fncache)
980 980 # Note: all files in fncache should be revlog related, However the
981 981 # fncache might contains such file added by previous version of
982 982 # Mercurial.
983 983 files = (f for f in files if f[1] is not None)
984 984 by_revlog = _gather_revlog(files)
985 985 for revlog, details in by_revlog:
986 986 if revlog.startswith(b'data/'):
987 987 rl_type = FILEFLAGS_FILELOG
988 988 revlog_target_id = revlog.split(b'/', 1)[1]
989 989 elif revlog.startswith(b'meta/'):
990 990 rl_type = FILEFLAGS_MANIFESTLOG
991 991 # drop the initial directory and the `00manifest` file part
992 992 tmp = revlog.split(b'/', 1)[1]
993 993 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
994 994 else:
995 995 # unreachable
996 996 assert False, revlog
997 997 for ext, t in sorted(details.items()):
998 998 f = revlog + ext
999 999 if not _matchtrackedpath(f, matcher):
1000 1000 continue
1001 1001 yield RevlogStoreEntry(
1002 1002 unencoded_path=f,
1003 1003 revlog_type=rl_type,
1004 1004 target_id=revlog_target_id,
1005 1005 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
1006 1006 is_volatile=bool(t & FILEFLAGS_VOLATILE),
1007 1007 )
1008 1008
1009 1009 def copylist(self):
1010 1010 d = (
1011 1011 b'bookmarks',
1012 1012 b'narrowspec',
1013 1013 b'data',
1014 1014 b'meta',
1015 1015 b'dh',
1016 1016 b'fncache',
1017 1017 b'phaseroots',
1018 1018 b'obsstore',
1019 1019 b'00manifest.d',
1020 1020 b'00manifest.i',
1021 1021 b'00changelog.d',
1022 1022 b'00changelog.i',
1023 1023 b'requires',
1024 1024 )
1025 1025 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1026 1026
1027 1027 def write(self, tr):
1028 1028 self.fncache.write(tr)
1029 1029
1030 1030 def invalidatecaches(self):
1031 1031 self.fncache.entries = None
1032 1032 self.fncache.addls = set()
1033 1033
1034 1034 def markremoved(self, fn):
1035 1035 self.fncache.remove(fn)
1036 1036
1037 1037 def _exists(self, f):
1038 1038 ef = self.encode(f)
1039 1039 try:
1040 1040 self.getsize(ef)
1041 1041 return True
1042 1042 except FileNotFoundError:
1043 1043 return False
1044 1044
1045 1045 def __contains__(self, path):
1046 1046 '''Checks if the store contains path'''
1047 1047 path = b"/".join((b"data", path))
1048 1048 # check for files (exact match)
1049 1049 e = path + b'.i'
1050 1050 if e in self.fncache and self._exists(e):
1051 1051 return True
1052 1052 # now check for directories (prefix match)
1053 1053 if not path.endswith(b'/'):
1054 1054 path += b'/'
1055 1055 for e in self.fncache:
1056 1056 if e.startswith(path) and self._exists(e):
1057 1057 return True
1058 1058 return False
General Comments 0
You need to be logged in to leave comments. Login now