##// END OF EJS Templates
store: add a `target_id` attribute on RevlogStoreEntry...
marmoute -
r51376:60e613f6 default
parent child Browse files
Show More
@@ -1,1043 +1,1054
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 import collections
9 9 import functools
10 10 import os
11 11 import re
12 12 import stat
13 13 from typing import Generator
14 14
15 15 from .i18n import _
16 16 from .pycompat import getattr
17 17 from .thirdparty import attr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in cmap.items():
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in range(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join([cmap[s[c : c + 1]] for c in range(len(s))]),
166 166 lambda s: b''.join(list(decode(s))),
167 167 )
168 168
169 169
170 170 _encodefname, _decodefname = _buildencodefun()
171 171
172 172
173 173 def encodefilename(s):
174 174 """
175 175 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
176 176 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
177 177 """
178 178 return _encodefname(encodedir(s))
179 179
180 180
181 181 def decodefilename(s):
182 182 """
183 183 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
184 184 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
185 185 """
186 186 return decodedir(_decodefname(s))
187 187
188 188
189 189 def _buildlowerencodefun():
190 190 """
191 191 >>> f = _buildlowerencodefun()
192 192 >>> f(b'nothing/special.txt')
193 193 'nothing/special.txt'
194 194 >>> f(b'HELLO')
195 195 'hello'
196 196 >>> f(b'hello:world?')
197 197 'hello~3aworld~3f'
198 198 >>> f(b'the\\x07quick\\xADshot')
199 199 'the~07quick~adshot'
200 200 """
201 201 xchr = pycompat.bytechr
202 202 cmap = {xchr(x): xchr(x) for x in range(127)}
203 203 for x in _reserved():
204 204 cmap[xchr(x)] = b"~%02x" % x
205 205 for x in range(ord(b"A"), ord(b"Z") + 1):
206 206 cmap[xchr(x)] = xchr(x).lower()
207 207
208 208 def lowerencode(s):
209 209 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
210 210
211 211 return lowerencode
212 212
213 213
214 214 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
215 215
216 216 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
217 217 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
218 218 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
219 219
220 220
221 221 def _auxencode(path, dotencode):
222 222 """
223 223 Encodes filenames containing names reserved by Windows or which end in
224 224 period or space. Does not touch other single reserved characters c.
225 225 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
226 226 Additionally encodes space or period at the beginning, if dotencode is
227 227 True. Parameter path is assumed to be all lowercase.
228 228 A segment only needs encoding if a reserved name appears as a
229 229 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
230 230 doesn't need encoding.
231 231
232 232 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
233 233 >>> _auxencode(s.split(b'/'), True)
234 234 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
235 235 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
236 236 >>> _auxencode(s.split(b'/'), False)
237 237 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
238 238 >>> _auxencode([b'foo. '], True)
239 239 ['foo.~20']
240 240 >>> _auxencode([b' .foo'], True)
241 241 ['~20.foo']
242 242 """
243 243 for i, n in enumerate(path):
244 244 if not n:
245 245 continue
246 246 if dotencode and n[0] in b'. ':
247 247 n = b"~%02x" % ord(n[0:1]) + n[1:]
248 248 path[i] = n
249 249 else:
250 250 l = n.find(b'.')
251 251 if l == -1:
252 252 l = len(n)
253 253 if (l == 3 and n[:3] in _winres3) or (
254 254 l == 4
255 255 and n[3:4] <= b'9'
256 256 and n[3:4] >= b'1'
257 257 and n[:3] in _winres4
258 258 ):
259 259 # encode third letter ('aux' -> 'au~78')
260 260 ec = b"~%02x" % ord(n[2:3])
261 261 n = n[0:2] + ec + n[3:]
262 262 path[i] = n
263 263 if n[-1] in b'. ':
264 264 # encode last period or space ('foo...' -> 'foo..~2e')
265 265 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
266 266 return path
267 267
268 268
269 269 _maxstorepathlen = 120
270 270 _dirprefixlen = 8
271 271 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
272 272
273 273
274 274 def _hashencode(path, dotencode):
275 275 digest = hex(hashutil.sha1(path).digest())
276 276 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
277 277 parts = _auxencode(le, dotencode)
278 278 basename = parts[-1]
279 279 _root, ext = os.path.splitext(basename)
280 280 sdirs = []
281 281 sdirslen = 0
282 282 for p in parts[:-1]:
283 283 d = p[:_dirprefixlen]
284 284 if d[-1] in b'. ':
285 285 # Windows can't access dirs ending in period or space
286 286 d = d[:-1] + b'_'
287 287 if sdirslen == 0:
288 288 t = len(d)
289 289 else:
290 290 t = sdirslen + 1 + len(d)
291 291 if t > _maxshortdirslen:
292 292 break
293 293 sdirs.append(d)
294 294 sdirslen = t
295 295 dirs = b'/'.join(sdirs)
296 296 if len(dirs) > 0:
297 297 dirs += b'/'
298 298 res = b'dh/' + dirs + digest + ext
299 299 spaceleft = _maxstorepathlen - len(res)
300 300 if spaceleft > 0:
301 301 filler = basename[:spaceleft]
302 302 res = b'dh/' + dirs + filler + digest + ext
303 303 return res
304 304
305 305
306 306 def _hybridencode(path, dotencode):
307 307 """encodes path with a length limit
308 308
309 309 Encodes all paths that begin with 'data/', according to the following.
310 310
311 311 Default encoding (reversible):
312 312
313 313 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
314 314 characters are encoded as '~xx', where xx is the two digit hex code
315 315 of the character (see encodefilename).
316 316 Relevant path components consisting of Windows reserved filenames are
317 317 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
318 318
319 319 Hashed encoding (not reversible):
320 320
321 321 If the default-encoded path is longer than _maxstorepathlen, a
322 322 non-reversible hybrid hashing of the path is done instead.
323 323 This encoding uses up to _dirprefixlen characters of all directory
324 324 levels of the lowerencoded path, but not more levels than can fit into
325 325 _maxshortdirslen.
326 326 Then follows the filler followed by the sha digest of the full path.
327 327 The filler is the beginning of the basename of the lowerencoded path
328 328 (the basename is everything after the last path separator). The filler
329 329 is as long as possible, filling in characters from the basename until
330 330 the encoded path has _maxstorepathlen characters (or all chars of the
331 331 basename have been taken).
332 332 The extension (e.g. '.i' or '.d') is preserved.
333 333
334 334 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
335 335 encoding was used.
336 336 """
337 337 path = encodedir(path)
338 338 ef = _encodefname(path).split(b'/')
339 339 res = b'/'.join(_auxencode(ef, dotencode))
340 340 if len(res) > _maxstorepathlen:
341 341 res = _hashencode(path, dotencode)
342 342 return res
343 343
344 344
345 345 def _pathencode(path):
346 346 de = encodedir(path)
347 347 if len(path) > _maxstorepathlen:
348 348 return _hashencode(de, True)
349 349 ef = _encodefname(de).split(b'/')
350 350 res = b'/'.join(_auxencode(ef, True))
351 351 if len(res) > _maxstorepathlen:
352 352 return _hashencode(de, True)
353 353 return res
354 354
355 355
356 356 _pathencode = getattr(parsers, 'pathencode', _pathencode)
357 357
358 358
359 359 def _plainhybridencode(f):
360 360 return _hybridencode(f, False)
361 361
362 362
363 363 def _calcmode(vfs):
364 364 try:
365 365 # files in .hg/ will be created using this mode
366 366 mode = vfs.stat().st_mode
367 367 # avoid some useless chmods
368 368 if (0o777 & ~util.umask) == (0o777 & mode):
369 369 mode = None
370 370 except OSError:
371 371 mode = None
372 372 return mode
373 373
374 374
375 375 _data = [
376 376 b'bookmarks',
377 377 b'narrowspec',
378 378 b'data',
379 379 b'meta',
380 380 b'00manifest.d',
381 381 b'00manifest.i',
382 382 b'00changelog.d',
383 383 b'00changelog.i',
384 384 b'phaseroots',
385 385 b'obsstore',
386 386 b'requires',
387 387 ]
388 388
389 389 REVLOG_FILES_MAIN_EXT = (b'.i',)
390 390 REVLOG_FILES_OTHER_EXT = (
391 391 b'.idx',
392 392 b'.d',
393 393 b'.dat',
394 394 b'.n',
395 395 b'.nd',
396 396 b'.sda',
397 397 )
398 398 # file extension that also use a `-SOMELONGIDHASH.ext` form
399 399 REVLOG_FILES_LONG_EXT = (
400 400 b'.nd',
401 401 b'.idx',
402 402 b'.dat',
403 403 b'.sda',
404 404 )
405 405 # files that are "volatile" and might change between listing and streaming
406 406 #
407 407 # note: the ".nd" file are nodemap data and won't "change" but they might be
408 408 # deleted.
409 409 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
410 410
411 411 # some exception to the above matching
412 412 #
413 413 # XXX This is currently not in use because of issue6542
414 414 EXCLUDED = re.compile(br'.*undo\.[^/]+\.(nd?|i)$')
415 415
416 416
417 417 def is_revlog(f, kind, st):
418 418 if kind != stat.S_IFREG:
419 419 return None
420 420 return revlog_type(f)
421 421
422 422
423 423 def revlog_type(f):
424 424 # XXX we need to filter `undo.` created by the transaction here, however
425 425 # being naive about it also filter revlog for `undo.*` files, leading to
426 426 # issue6542. So we no longer use EXCLUDED.
427 427 if f.endswith(REVLOG_FILES_MAIN_EXT):
428 428 return FILEFLAGS_REVLOG_MAIN
429 429 elif f.endswith(REVLOG_FILES_OTHER_EXT):
430 430 t = FILETYPE_FILELOG_OTHER
431 431 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
432 432 t |= FILEFLAGS_VOLATILE
433 433 return t
434 434 return None
435 435
436 436
437 437 # the file is part of changelog data
438 438 FILEFLAGS_CHANGELOG = 1 << 13
439 439 # the file is part of manifest data
440 440 FILEFLAGS_MANIFESTLOG = 1 << 12
441 441 # the file is part of filelog data
442 442 FILEFLAGS_FILELOG = 1 << 11
443 443 # file that are not directly part of a revlog
444 444 FILEFLAGS_OTHER = 1 << 10
445 445
446 446 # the main entry point for a revlog
447 447 FILEFLAGS_REVLOG_MAIN = 1 << 1
448 448 # a secondary file for a revlog
449 449 FILEFLAGS_REVLOG_OTHER = 1 << 0
450 450
451 451 # files that are "volatile" and might change between listing and streaming
452 452 FILEFLAGS_VOLATILE = 1 << 20
453 453
454 454 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
457 457 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
458 458 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
459 459 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
460 460 FILETYPE_OTHER = FILEFLAGS_OTHER
461 461
462 462
463 463 @attr.s(slots=True, init=False)
464 464 class BaseStoreEntry:
465 465 """An entry in the store
466 466
467 467 This is returned by `store.walk` and represent some data in the store."""
468 468
469 469 unencoded_path = attr.ib()
470 470 _is_volatile = attr.ib(default=False)
471 471 _file_size = attr.ib(default=None)
472 472
473 473 def __init__(
474 474 self,
475 475 unencoded_path,
476 476 is_volatile=False,
477 477 file_size=None,
478 478 ):
479 479 self.unencoded_path = unencoded_path
480 480 self._is_volatile = is_volatile
481 481 self._file_size = file_size
482 482
483 483 def files(self):
484 484 return [
485 485 StoreFile(
486 486 unencoded_path=self.unencoded_path,
487 487 file_size=self._file_size,
488 488 is_volatile=self._is_volatile,
489 489 )
490 490 ]
491 491
492 492
493 493 @attr.s(slots=True, init=False)
494 494 class SimpleStoreEntry(BaseStoreEntry):
495 495 """A generic entry in the store"""
496 496
497 497 is_revlog = False
498 498
499 499
500 500 @attr.s(slots=True, init=False)
501 501 class RevlogStoreEntry(BaseStoreEntry):
502 502 """A revlog entry in the store"""
503 503
504 504 is_revlog = True
505 505 revlog_type = attr.ib(default=None)
506 target_id = attr.ib(default=None)
506 507 is_revlog_main = attr.ib(default=None)
507 508
508 509 def __init__(
509 510 self,
510 511 unencoded_path,
511 512 revlog_type,
513 target_id,
512 514 is_revlog_main=False,
513 515 is_volatile=False,
514 516 file_size=None,
515 517 ):
516 518 super().__init__(
517 519 unencoded_path=unencoded_path,
518 520 is_volatile=is_volatile,
519 521 file_size=file_size,
520 522 )
521 523 self.revlog_type = revlog_type
524 self.target_id = target_id
522 525 self.is_revlog_main = is_revlog_main
523 526
524 527
525 528 @attr.s(slots=True)
526 529 class StoreFile:
527 530 """a file matching an entry"""
528 531
529 532 unencoded_path = attr.ib()
530 533 _file_size = attr.ib(default=False)
531 534 is_volatile = attr.ib(default=False)
532 535
533 536 def file_size(self, vfs):
534 537 if self._file_size is not None:
535 538 return self._file_size
536 539 try:
537 540 return vfs.stat(self.unencoded_path).st_size
538 541 except FileNotFoundError:
539 542 return 0
540 543
541 544
542 545 def _gather_revlog(files_data):
543 546 """group files per revlog prefix
544 547
545 548 The returns a two level nested dict. The top level key is the revlog prefix
546 549 without extension, the second level is all the file "suffix" that were
547 550 seen for this revlog and arbitrary file data as value.
548 551 """
549 552 revlogs = collections.defaultdict(dict)
550 553 for u, value in files_data:
551 554 name, ext = _split_revlog_ext(u)
552 555 revlogs[name][ext] = value
553 556 return sorted(revlogs.items())
554 557
555 558
556 559 def _split_revlog_ext(filename):
557 560 """split the revlog file prefix from the variable extension"""
558 561 if filename.endswith(REVLOG_FILES_LONG_EXT):
559 562 char = b'-'
560 563 else:
561 564 char = b'.'
562 565 idx = filename.rfind(char)
563 566 return filename[:idx], filename[idx:]
564 567
565 568
566 569 def _ext_key(ext):
567 570 """a key to order revlog suffix
568 571
569 572 important to issue .i after other entry."""
570 573 # the only important part of this order is to keep the `.i` last.
571 574 if ext.endswith(b'.n'):
572 575 return (0, ext)
573 576 elif ext.endswith(b'.nd'):
574 577 return (10, ext)
575 578 elif ext.endswith(b'.d'):
576 579 return (20, ext)
577 580 elif ext.endswith(b'.i'):
578 581 return (50, ext)
579 582 else:
580 583 return (40, ext)
581 584
582 585
583 586 class basicstore:
584 587 '''base class for local repository stores'''
585 588
586 589 def __init__(self, path, vfstype):
587 590 vfs = vfstype(path)
588 591 self.path = vfs.base
589 592 self.createmode = _calcmode(vfs)
590 593 vfs.createmode = self.createmode
591 594 self.rawvfs = vfs
592 595 self.vfs = vfsmod.filtervfs(vfs, encodedir)
593 596 self.opener = self.vfs
594 597
595 598 def join(self, f):
596 599 return self.path + b'/' + encodedir(f)
597 600
598 601 def _walk(self, relpath, recurse):
599 602 '''yields (revlog_type, unencoded, size)'''
600 603 path = self.path
601 604 if relpath:
602 605 path += b'/' + relpath
603 606 striplen = len(self.path) + 1
604 607 l = []
605 608 if self.rawvfs.isdir(path):
606 609 visit = [path]
607 610 readdir = self.rawvfs.readdir
608 611 while visit:
609 612 p = visit.pop()
610 613 for f, kind, st in readdir(p, stat=True):
611 614 fp = p + b'/' + f
612 615 rl_type = is_revlog(f, kind, st)
613 616 if rl_type is not None:
614 617 n = util.pconvert(fp[striplen:])
615 618 l.append((decodedir(n), (rl_type, st.st_size)))
616 619 elif kind == stat.S_IFDIR and recurse:
617 620 visit.append(fp)
618 621
619 622 l.sort()
620 623 return l
621 624
622 625 def changelog(self, trypending, concurrencychecker=None):
623 626 return changelog.changelog(
624 627 self.vfs,
625 628 trypending=trypending,
626 629 concurrencychecker=concurrencychecker,
627 630 )
628 631
629 632 def manifestlog(self, repo, storenarrowmatch):
630 633 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
631 634 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
632 635
633 636 def datafiles(
634 637 self, matcher=None, undecodable=None
635 638 ) -> Generator[BaseStoreEntry, None, None]:
636 639 """Like walk, but excluding the changelog and root manifest.
637 640
638 641 When [undecodable] is None, revlogs names that can't be
639 642 decoded cause an exception. When it is provided, it should
640 643 be a list and the filenames that can't be decoded are added
641 644 to it instead. This is very rarely needed."""
642 645 dirs = [
643 646 (b'data', FILEFLAGS_FILELOG),
644 647 (b'meta', FILEFLAGS_MANIFESTLOG),
645 648 ]
646 649 for base_dir, rl_type in dirs:
647 650 files = self._walk(base_dir, True)
648 651 files = (f for f in files if f[1][0] is not None)
649 652 for revlog, details in _gather_revlog(files):
650 653 for ext, (t, s) in sorted(details.items()):
651 654 u = revlog + ext
655 revlog_target_id = revlog.split(b'/', 1)[1]
652 656 yield RevlogStoreEntry(
653 657 unencoded_path=u,
654 658 revlog_type=rl_type,
659 target_id=revlog_target_id,
655 660 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
656 661 is_volatile=bool(t & FILEFLAGS_VOLATILE),
657 662 file_size=s,
658 663 )
659 664
660 665 def topfiles(self) -> Generator[BaseStoreEntry, None, None]:
661 666 files = reversed(self._walk(b'', False))
662 667
663 668 changelogs = collections.defaultdict(dict)
664 669 manifestlogs = collections.defaultdict(dict)
665 670
666 671 for u, (t, s) in files:
667 672 if u.startswith(b'00changelog'):
668 673 name, ext = _split_revlog_ext(u)
669 674 changelogs[name][ext] = (t, s)
670 675 elif u.startswith(b'00manifest'):
671 676 name, ext = _split_revlog_ext(u)
672 677 manifestlogs[name][ext] = (t, s)
673 678 else:
674 679 yield SimpleStoreEntry(
675 680 unencoded_path=u,
676 681 is_volatile=bool(t & FILEFLAGS_VOLATILE),
677 682 file_size=s,
678 683 )
679 684 # yield manifest before changelog
680 685 top_rl = [
681 686 (manifestlogs, FILEFLAGS_MANIFESTLOG),
682 687 (changelogs, FILEFLAGS_CHANGELOG),
683 688 ]
684 689 assert len(manifestlogs) <= 1
685 690 assert len(changelogs) <= 1
686 691 for data, revlog_type in top_rl:
687 692 for revlog, details in sorted(data.items()):
688 693 # (keeping ordering so we get 00changelog.i last)
689 694 key = lambda x: _ext_key(x[0])
690 695 for ext, (t, s) in sorted(details.items(), key=key):
691 696 u = revlog + ext
692 697 yield RevlogStoreEntry(
693 698 unencoded_path=u,
694 699 revlog_type=revlog_type,
700 target_id=b'',
695 701 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
696 702 is_volatile=bool(t & FILEFLAGS_VOLATILE),
697 703 file_size=s,
698 704 )
699 705
700 706 def walk(self, matcher=None) -> Generator[BaseStoreEntry, None, None]:
701 707 """return files related to data storage (ie: revlogs)
702 708
703 709 yields (file_type, unencoded, size)
704 710
705 711 if a matcher is passed, storage files of only those tracked paths
706 712 are passed with matches the matcher
707 713 """
708 714 # yield data files first
709 715 for x in self.datafiles(matcher):
710 716 yield x
711 717 for x in self.topfiles():
712 718 yield x
713 719
714 720 def copylist(self):
715 721 return _data
716 722
717 723 def write(self, tr):
718 724 pass
719 725
720 726 def invalidatecaches(self):
721 727 pass
722 728
723 729 def markremoved(self, fn):
724 730 pass
725 731
726 732 def __contains__(self, path):
727 733 '''Checks if the store contains path'''
728 734 path = b"/".join((b"data", path))
729 735 # file?
730 736 if self.vfs.exists(path + b".i"):
731 737 return True
732 738 # dir?
733 739 if not path.endswith(b"/"):
734 740 path = path + b"/"
735 741 return self.vfs.exists(path)
736 742
737 743
738 744 class encodedstore(basicstore):
739 745 def __init__(self, path, vfstype):
740 746 vfs = vfstype(path + b'/store')
741 747 self.path = vfs.base
742 748 self.createmode = _calcmode(vfs)
743 749 vfs.createmode = self.createmode
744 750 self.rawvfs = vfs
745 751 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
746 752 self.opener = self.vfs
747 753
748 754 # note: topfiles would also need a decode phase. It is just that in
749 755 # practice we do not have any file outside of `data/` that needs encoding.
750 756 # However that might change so we should probably add a test and encoding
751 757 # decoding for it too. see issue6548
752 758
753 759 def datafiles(
754 760 self, matcher=None, undecodable=None
755 761 ) -> Generator[BaseStoreEntry, None, None]:
756 762 for entry in super(encodedstore, self).datafiles():
757 763 try:
758 764 f1 = entry.unencoded_path
759 765 f2 = decodefilename(f1)
760 766 except KeyError:
761 767 if undecodable is None:
762 768 msg = _(b'undecodable revlog name %s') % f1
763 769 raise error.StorageError(msg)
764 770 else:
765 771 undecodable.append(f1)
766 772 continue
767 773 if not _matchtrackedpath(f2, matcher):
768 774 continue
769 775 entry.unencoded_path = f2
770 776 yield entry
771 777
772 778 def join(self, f):
773 779 return self.path + b'/' + encodefilename(f)
774 780
775 781 def copylist(self):
776 782 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
777 783
778 784
779 785 class fncache:
780 786 # the filename used to be partially encoded
781 787 # hence the encodedir/decodedir dance
782 788 def __init__(self, vfs):
783 789 self.vfs = vfs
784 790 self._ignores = set()
785 791 self.entries = None
786 792 self._dirty = False
787 793 # set of new additions to fncache
788 794 self.addls = set()
789 795
790 796 def ensureloaded(self, warn=None):
791 797 """read the fncache file if not already read.
792 798
793 799 If the file on disk is corrupted, raise. If warn is provided,
794 800 warn and keep going instead."""
795 801 if self.entries is None:
796 802 self._load(warn)
797 803
798 804 def _load(self, warn=None):
799 805 '''fill the entries from the fncache file'''
800 806 self._dirty = False
801 807 try:
802 808 fp = self.vfs(b'fncache', mode=b'rb')
803 809 except IOError:
804 810 # skip nonexistent file
805 811 self.entries = set()
806 812 return
807 813
808 814 self.entries = set()
809 815 chunk = b''
810 816 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
811 817 chunk += c
812 818 try:
813 819 p = chunk.rindex(b'\n')
814 820 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
815 821 chunk = chunk[p + 1 :]
816 822 except ValueError:
817 823 # substring '\n' not found, maybe the entry is bigger than the
818 824 # chunksize, so let's keep iterating
819 825 pass
820 826
821 827 if chunk:
822 828 msg = _(b"fncache does not ends with a newline")
823 829 if warn:
824 830 warn(msg + b'\n')
825 831 else:
826 832 raise error.Abort(
827 833 msg,
828 834 hint=_(
829 835 b"use 'hg debugrebuildfncache' to "
830 836 b"rebuild the fncache"
831 837 ),
832 838 )
833 839 self._checkentries(fp, warn)
834 840 fp.close()
835 841
836 842 def _checkentries(self, fp, warn):
837 843 """make sure there is no empty string in entries"""
838 844 if b'' in self.entries:
839 845 fp.seek(0)
840 846 for n, line in enumerate(fp):
841 847 if not line.rstrip(b'\n'):
842 848 t = _(b'invalid entry in fncache, line %d') % (n + 1)
843 849 if warn:
844 850 warn(t + b'\n')
845 851 else:
846 852 raise error.Abort(t)
847 853
848 854 def write(self, tr):
849 855 if self._dirty:
850 856 assert self.entries is not None
851 857 self.entries = self.entries | self.addls
852 858 self.addls = set()
853 859 tr.addbackup(b'fncache')
854 860 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
855 861 if self.entries:
856 862 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
857 863 fp.close()
858 864 self._dirty = False
859 865 if self.addls:
860 866 # if we have just new entries, let's append them to the fncache
861 867 tr.addbackup(b'fncache')
862 868 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
863 869 if self.addls:
864 870 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
865 871 fp.close()
866 872 self.entries = None
867 873 self.addls = set()
868 874
869 875 def addignore(self, fn):
870 876 self._ignores.add(fn)
871 877
872 878 def add(self, fn):
873 879 if fn in self._ignores:
874 880 return
875 881 if self.entries is None:
876 882 self._load()
877 883 if fn not in self.entries:
878 884 self.addls.add(fn)
879 885
880 886 def remove(self, fn):
881 887 if self.entries is None:
882 888 self._load()
883 889 if fn in self.addls:
884 890 self.addls.remove(fn)
885 891 return
886 892 try:
887 893 self.entries.remove(fn)
888 894 self._dirty = True
889 895 except KeyError:
890 896 pass
891 897
892 898 def __contains__(self, fn):
893 899 if fn in self.addls:
894 900 return True
895 901 if self.entries is None:
896 902 self._load()
897 903 return fn in self.entries
898 904
899 905 def __iter__(self):
900 906 if self.entries is None:
901 907 self._load()
902 908 return iter(self.entries | self.addls)
903 909
904 910
905 911 class _fncachevfs(vfsmod.proxyvfs):
906 912 def __init__(self, vfs, fnc, encode):
907 913 vfsmod.proxyvfs.__init__(self, vfs)
908 914 self.fncache = fnc
909 915 self.encode = encode
910 916
911 917 def __call__(self, path, mode=b'r', *args, **kw):
912 918 encoded = self.encode(path)
913 919 if (
914 920 mode not in (b'r', b'rb')
915 921 and (path.startswith(b'data/') or path.startswith(b'meta/'))
916 922 and revlog_type(path) is not None
917 923 ):
918 924 # do not trigger a fncache load when adding a file that already is
919 925 # known to exist.
920 926 notload = self.fncache.entries is None and self.vfs.exists(encoded)
921 927 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
922 928 # when appending to an existing file, if the file has size zero,
923 929 # it should be considered as missing. Such zero-size files are
924 930 # the result of truncation when a transaction is aborted.
925 931 notload = False
926 932 if not notload:
927 933 self.fncache.add(path)
928 934 return self.vfs(encoded, mode, *args, **kw)
929 935
930 936 def join(self, path):
931 937 if path:
932 938 return self.vfs.join(self.encode(path))
933 939 else:
934 940 return self.vfs.join(path)
935 941
936 942 def register_file(self, path):
937 943 """generic hook point to lets fncache steer its stew"""
938 944 if path.startswith(b'data/') or path.startswith(b'meta/'):
939 945 self.fncache.add(path)
940 946
941 947
942 948 class fncachestore(basicstore):
943 949 def __init__(self, path, vfstype, dotencode):
944 950 if dotencode:
945 951 encode = _pathencode
946 952 else:
947 953 encode = _plainhybridencode
948 954 self.encode = encode
949 955 vfs = vfstype(path + b'/store')
950 956 self.path = vfs.base
951 957 self.pathsep = self.path + b'/'
952 958 self.createmode = _calcmode(vfs)
953 959 vfs.createmode = self.createmode
954 960 self.rawvfs = vfs
955 961 fnc = fncache(vfs)
956 962 self.fncache = fnc
957 963 self.vfs = _fncachevfs(vfs, fnc, encode)
958 964 self.opener = self.vfs
959 965
960 966 def join(self, f):
961 967 return self.pathsep + self.encode(f)
962 968
963 969 def getsize(self, path):
964 970 return self.rawvfs.stat(path).st_size
965 971
966 972 def datafiles(
967 973 self, matcher=None, undecodable=None
968 974 ) -> Generator[BaseStoreEntry, None, None]:
969 975 files = ((f, revlog_type(f)) for f in self.fncache)
970 976 # Note: all files in fncache should be revlog related, However the
971 977 # fncache might contains such file added by previous version of
972 978 # Mercurial.
973 979 files = (f for f in files if f[1] is not None)
974 980 by_revlog = _gather_revlog(files)
975 981 for revlog, details in by_revlog:
976 982 if revlog.startswith(b'data/'):
977 983 rl_type = FILEFLAGS_FILELOG
984 revlog_target_id = revlog.split(b'/', 1)[1]
978 985 elif revlog.startswith(b'meta/'):
979 986 rl_type = FILEFLAGS_MANIFESTLOG
987 # drop the initial directory and the `00manifest` file part
988 tmp = revlog.split(b'/', 1)[1]
989 revlog_target_id = tmp.rsplit(b'/', 1)[0] + b'/'
980 990 else:
981 991 # unreachable
982 992 assert False, revlog
983 993 for ext, t in sorted(details.items()):
984 994 f = revlog + ext
985 995 if not _matchtrackedpath(f, matcher):
986 996 continue
987 997 yield RevlogStoreEntry(
988 998 unencoded_path=f,
989 999 revlog_type=rl_type,
1000 target_id=revlog_target_id,
990 1001 is_revlog_main=bool(t & FILEFLAGS_REVLOG_MAIN),
991 1002 is_volatile=bool(t & FILEFLAGS_VOLATILE),
992 1003 )
993 1004
994 1005 def copylist(self):
995 1006 d = (
996 1007 b'bookmarks',
997 1008 b'narrowspec',
998 1009 b'data',
999 1010 b'meta',
1000 1011 b'dh',
1001 1012 b'fncache',
1002 1013 b'phaseroots',
1003 1014 b'obsstore',
1004 1015 b'00manifest.d',
1005 1016 b'00manifest.i',
1006 1017 b'00changelog.d',
1007 1018 b'00changelog.i',
1008 1019 b'requires',
1009 1020 )
1010 1021 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
1011 1022
1012 1023 def write(self, tr):
1013 1024 self.fncache.write(tr)
1014 1025
1015 1026 def invalidatecaches(self):
1016 1027 self.fncache.entries = None
1017 1028 self.fncache.addls = set()
1018 1029
1019 1030 def markremoved(self, fn):
1020 1031 self.fncache.remove(fn)
1021 1032
1022 1033 def _exists(self, f):
1023 1034 ef = self.encode(f)
1024 1035 try:
1025 1036 self.getsize(ef)
1026 1037 return True
1027 1038 except FileNotFoundError:
1028 1039 return False
1029 1040
1030 1041 def __contains__(self, path):
1031 1042 '''Checks if the store contains path'''
1032 1043 path = b"/".join((b"data", path))
1033 1044 # check for files (exact match)
1034 1045 e = path + b'.i'
1035 1046 if e in self.fncache and self._exists(e):
1036 1047 return True
1037 1048 # now check for directories (prefix match)
1038 1049 if not path.endswith(b'/'):
1039 1050 path += b'/'
1040 1051 for e in self.fncache:
1041 1052 if e.startswith(path) and self._exists(e):
1042 1053 return True
1043 1054 return False
General Comments 0
You need to be logged in to leave comments. Login now