##// END OF EJS Templates
store: document the decoding discrepancy in store.py...
marmoute -
r48589:59bc92a7 stable
parent child Browse files
Show More
@@ -1,834 +1,839 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Olivia Mackall <olivia@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import functools
12 12 import os
13 13 import re
14 14 import stat
15 15
16 16 from .i18n import _
17 17 from .pycompat import getattr
18 18 from .node import hex
19 19 from . import (
20 20 changelog,
21 21 error,
22 22 manifest,
23 23 policy,
24 24 pycompat,
25 25 util,
26 26 vfs as vfsmod,
27 27 )
28 28 from .utils import hashutil
29 29
30 30 parsers = policy.importmod('parsers')
31 31 # how much bytes should be read from fncache in one read
32 32 # It is done to prevent loading large fncache files into memory
33 33 fncache_chunksize = 10 ** 6
34 34
35 35
36 36 def _matchtrackedpath(path, matcher):
37 37 """parses a fncache entry and returns whether the entry is tracking a path
38 38 matched by matcher or not.
39 39
40 40 If matcher is None, returns True"""
41 41
42 42 if matcher is None:
43 43 return True
44 44 path = decodedir(path)
45 45 if path.startswith(b'data/'):
46 46 return matcher(path[len(b'data/') : -len(b'.i')])
47 47 elif path.startswith(b'meta/'):
48 48 return matcher.visitdir(path[len(b'meta/') : -len(b'/00manifest.i')])
49 49
50 50 raise error.ProgrammingError(b"cannot decode path %s" % path)
51 51
52 52
53 53 # This avoids a collision between a file named foo and a dir named
54 54 # foo.i or foo.d
55 55 def _encodedir(path):
56 56 """
57 57 >>> _encodedir(b'data/foo.i')
58 58 'data/foo.i'
59 59 >>> _encodedir(b'data/foo.i/bla.i')
60 60 'data/foo.i.hg/bla.i'
61 61 >>> _encodedir(b'data/foo.i.hg/bla.i')
62 62 'data/foo.i.hg.hg/bla.i'
63 63 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
64 64 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
65 65 """
66 66 return (
67 67 path.replace(b".hg/", b".hg.hg/")
68 68 .replace(b".i/", b".i.hg/")
69 69 .replace(b".d/", b".d.hg/")
70 70 )
71 71
72 72
73 73 encodedir = getattr(parsers, 'encodedir', _encodedir)
74 74
75 75
76 76 def decodedir(path):
77 77 """
78 78 >>> decodedir(b'data/foo.i')
79 79 'data/foo.i'
80 80 >>> decodedir(b'data/foo.i.hg/bla.i')
81 81 'data/foo.i/bla.i'
82 82 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
83 83 'data/foo.i.hg/bla.i'
84 84 """
85 85 if b".hg/" not in path:
86 86 return path
87 87 return (
88 88 path.replace(b".d.hg/", b".d/")
89 89 .replace(b".i.hg/", b".i/")
90 90 .replace(b".hg.hg/", b".hg/")
91 91 )
92 92
93 93
94 94 def _reserved():
95 95 """characters that are problematic for filesystems
96 96
97 97 * ascii escapes (0..31)
98 98 * ascii hi (126..255)
99 99 * windows specials
100 100
101 101 these characters will be escaped by encodefunctions
102 102 """
103 103 winreserved = [ord(x) for x in u'\\:*?"<>|']
104 104 for x in range(32):
105 105 yield x
106 106 for x in range(126, 256):
107 107 yield x
108 108 for x in winreserved:
109 109 yield x
110 110
111 111
112 112 def _buildencodefun():
113 113 """
114 114 >>> enc, dec = _buildencodefun()
115 115
116 116 >>> enc(b'nothing/special.txt')
117 117 'nothing/special.txt'
118 118 >>> dec(b'nothing/special.txt')
119 119 'nothing/special.txt'
120 120
121 121 >>> enc(b'HELLO')
122 122 '_h_e_l_l_o'
123 123 >>> dec(b'_h_e_l_l_o')
124 124 'HELLO'
125 125
126 126 >>> enc(b'hello:world?')
127 127 'hello~3aworld~3f'
128 128 >>> dec(b'hello~3aworld~3f')
129 129 'hello:world?'
130 130
131 131 >>> enc(b'the\\x07quick\\xADshot')
132 132 'the~07quick~adshot'
133 133 >>> dec(b'the~07quick~adshot')
134 134 'the\\x07quick\\xadshot'
135 135 """
136 136 e = b'_'
137 137 xchr = pycompat.bytechr
138 138 asciistr = list(map(xchr, range(127)))
139 139 capitals = list(range(ord(b"A"), ord(b"Z") + 1))
140 140
141 141 cmap = {x: x for x in asciistr}
142 142 for x in _reserved():
143 143 cmap[xchr(x)] = b"~%02x" % x
144 144 for x in capitals + [ord(e)]:
145 145 cmap[xchr(x)] = e + xchr(x).lower()
146 146
147 147 dmap = {}
148 148 for k, v in pycompat.iteritems(cmap):
149 149 dmap[v] = k
150 150
151 151 def decode(s):
152 152 i = 0
153 153 while i < len(s):
154 154 for l in pycompat.xrange(1, 4):
155 155 try:
156 156 yield dmap[s[i : i + l]]
157 157 i += l
158 158 break
159 159 except KeyError:
160 160 pass
161 161 else:
162 162 raise KeyError
163 163
164 164 return (
165 165 lambda s: b''.join(
166 166 [cmap[s[c : c + 1]] for c in pycompat.xrange(len(s))]
167 167 ),
168 168 lambda s: b''.join(list(decode(s))),
169 169 )
170 170
171 171
172 172 _encodefname, _decodefname = _buildencodefun()
173 173
174 174
175 175 def encodefilename(s):
176 176 """
177 177 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
178 178 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
179 179 """
180 180 return _encodefname(encodedir(s))
181 181
182 182
183 183 def decodefilename(s):
184 184 """
185 185 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
186 186 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
187 187 """
188 188 return decodedir(_decodefname(s))
189 189
190 190
191 191 def _buildlowerencodefun():
192 192 """
193 193 >>> f = _buildlowerencodefun()
194 194 >>> f(b'nothing/special.txt')
195 195 'nothing/special.txt'
196 196 >>> f(b'HELLO')
197 197 'hello'
198 198 >>> f(b'hello:world?')
199 199 'hello~3aworld~3f'
200 200 >>> f(b'the\\x07quick\\xADshot')
201 201 'the~07quick~adshot'
202 202 """
203 203 xchr = pycompat.bytechr
204 204 cmap = {xchr(x): xchr(x) for x in pycompat.xrange(127)}
205 205 for x in _reserved():
206 206 cmap[xchr(x)] = b"~%02x" % x
207 207 for x in range(ord(b"A"), ord(b"Z") + 1):
208 208 cmap[xchr(x)] = xchr(x).lower()
209 209
210 210 def lowerencode(s):
211 211 return b"".join([cmap[c] for c in pycompat.iterbytestr(s)])
212 212
213 213 return lowerencode
214 214
215 215
216 216 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
217 217
218 218 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
219 219 _winres3 = (b'aux', b'con', b'prn', b'nul') # length 3
220 220 _winres4 = (b'com', b'lpt') # length 4 (with trailing 1..9)
221 221
222 222
223 223 def _auxencode(path, dotencode):
224 224 """
225 225 Encodes filenames containing names reserved by Windows or which end in
226 226 period or space. Does not touch other single reserved characters c.
227 227 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
228 228 Additionally encodes space or period at the beginning, if dotencode is
229 229 True. Parameter path is assumed to be all lowercase.
230 230 A segment only needs encoding if a reserved name appears as a
231 231 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
232 232 doesn't need encoding.
233 233
234 234 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
235 235 >>> _auxencode(s.split(b'/'), True)
236 236 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
237 237 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
238 238 >>> _auxencode(s.split(b'/'), False)
239 239 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
240 240 >>> _auxencode([b'foo. '], True)
241 241 ['foo.~20']
242 242 >>> _auxencode([b' .foo'], True)
243 243 ['~20.foo']
244 244 """
245 245 for i, n in enumerate(path):
246 246 if not n:
247 247 continue
248 248 if dotencode and n[0] in b'. ':
249 249 n = b"~%02x" % ord(n[0:1]) + n[1:]
250 250 path[i] = n
251 251 else:
252 252 l = n.find(b'.')
253 253 if l == -1:
254 254 l = len(n)
255 255 if (l == 3 and n[:3] in _winres3) or (
256 256 l == 4
257 257 and n[3:4] <= b'9'
258 258 and n[3:4] >= b'1'
259 259 and n[:3] in _winres4
260 260 ):
261 261 # encode third letter ('aux' -> 'au~78')
262 262 ec = b"~%02x" % ord(n[2:3])
263 263 n = n[0:2] + ec + n[3:]
264 264 path[i] = n
265 265 if n[-1] in b'. ':
266 266 # encode last period or space ('foo...' -> 'foo..~2e')
267 267 path[i] = n[:-1] + b"~%02x" % ord(n[-1:])
268 268 return path
269 269
270 270
271 271 _maxstorepathlen = 120
272 272 _dirprefixlen = 8
273 273 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
274 274
275 275
276 276 def _hashencode(path, dotencode):
277 277 digest = hex(hashutil.sha1(path).digest())
278 278 le = lowerencode(path[5:]).split(b'/') # skips prefix 'data/' or 'meta/'
279 279 parts = _auxencode(le, dotencode)
280 280 basename = parts[-1]
281 281 _root, ext = os.path.splitext(basename)
282 282 sdirs = []
283 283 sdirslen = 0
284 284 for p in parts[:-1]:
285 285 d = p[:_dirprefixlen]
286 286 if d[-1] in b'. ':
287 287 # Windows can't access dirs ending in period or space
288 288 d = d[:-1] + b'_'
289 289 if sdirslen == 0:
290 290 t = len(d)
291 291 else:
292 292 t = sdirslen + 1 + len(d)
293 293 if t > _maxshortdirslen:
294 294 break
295 295 sdirs.append(d)
296 296 sdirslen = t
297 297 dirs = b'/'.join(sdirs)
298 298 if len(dirs) > 0:
299 299 dirs += b'/'
300 300 res = b'dh/' + dirs + digest + ext
301 301 spaceleft = _maxstorepathlen - len(res)
302 302 if spaceleft > 0:
303 303 filler = basename[:spaceleft]
304 304 res = b'dh/' + dirs + filler + digest + ext
305 305 return res
306 306
307 307
308 308 def _hybridencode(path, dotencode):
309 309 """encodes path with a length limit
310 310
311 311 Encodes all paths that begin with 'data/', according to the following.
312 312
313 313 Default encoding (reversible):
314 314
315 315 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
316 316 characters are encoded as '~xx', where xx is the two digit hex code
317 317 of the character (see encodefilename).
318 318 Relevant path components consisting of Windows reserved filenames are
319 319 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
320 320
321 321 Hashed encoding (not reversible):
322 322
323 323 If the default-encoded path is longer than _maxstorepathlen, a
324 324 non-reversible hybrid hashing of the path is done instead.
325 325 This encoding uses up to _dirprefixlen characters of all directory
326 326 levels of the lowerencoded path, but not more levels than can fit into
327 327 _maxshortdirslen.
328 328 Then follows the filler followed by the sha digest of the full path.
329 329 The filler is the beginning of the basename of the lowerencoded path
330 330 (the basename is everything after the last path separator). The filler
331 331 is as long as possible, filling in characters from the basename until
332 332 the encoded path has _maxstorepathlen characters (or all chars of the
333 333 basename have been taken).
334 334 The extension (e.g. '.i' or '.d') is preserved.
335 335
336 336 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
337 337 encoding was used.
338 338 """
339 339 path = encodedir(path)
340 340 ef = _encodefname(path).split(b'/')
341 341 res = b'/'.join(_auxencode(ef, dotencode))
342 342 if len(res) > _maxstorepathlen:
343 343 res = _hashencode(path, dotencode)
344 344 return res
345 345
346 346
347 347 def _pathencode(path):
348 348 de = encodedir(path)
349 349 if len(path) > _maxstorepathlen:
350 350 return _hashencode(de, True)
351 351 ef = _encodefname(de).split(b'/')
352 352 res = b'/'.join(_auxencode(ef, True))
353 353 if len(res) > _maxstorepathlen:
354 354 return _hashencode(de, True)
355 355 return res
356 356
357 357
358 358 _pathencode = getattr(parsers, 'pathencode', _pathencode)
359 359
360 360
361 361 def _plainhybridencode(f):
362 362 return _hybridencode(f, False)
363 363
364 364
365 365 def _calcmode(vfs):
366 366 try:
367 367 # files in .hg/ will be created using this mode
368 368 mode = vfs.stat().st_mode
369 369 # avoid some useless chmods
370 370 if (0o777 & ~util.umask) == (0o777 & mode):
371 371 mode = None
372 372 except OSError:
373 373 mode = None
374 374 return mode
375 375
376 376
377 377 _data = [
378 378 b'bookmarks',
379 379 b'narrowspec',
380 380 b'data',
381 381 b'meta',
382 382 b'00manifest.d',
383 383 b'00manifest.i',
384 384 b'00changelog.d',
385 385 b'00changelog.i',
386 386 b'phaseroots',
387 387 b'obsstore',
388 388 b'requires',
389 389 ]
390 390
391 391 REVLOG_FILES_MAIN_EXT = (b'.i', b'i.tmpcensored')
392 392 REVLOG_FILES_OTHER_EXT = (
393 393 b'.idx',
394 394 b'.d',
395 395 b'.dat',
396 396 b'.n',
397 397 b'.nd',
398 398 b'.sda',
399 399 b'd.tmpcensored',
400 400 )
401 401 # files that are "volatile" and might change between listing and streaming
402 402 #
403 403 # note: the ".nd" file are nodemap data and won't "change" but they might be
404 404 # deleted.
405 405 REVLOG_FILES_VOLATILE_EXT = (b'.n', b'.nd')
406 406
407 407 # some exception to the above matching
408 408 #
409 409 # XXX This is currently not in use because of issue6542
410 410 EXCLUDED = re.compile(b'.*undo\.[^/]+\.(nd?|i)$')
411 411
412 412
413 413 def is_revlog(f, kind, st):
414 414 if kind != stat.S_IFREG:
415 415 return None
416 416 return revlog_type(f)
417 417
418 418
419 419 def revlog_type(f):
420 420 # XXX we need to filter `undo.` created by the transaction here, however
421 421 # being naive about it also filter revlog for `undo.*` files, leading to
422 422 # issue6542. So we no longer use EXCLUDED.
423 423 if f.endswith(REVLOG_FILES_MAIN_EXT):
424 424 return FILEFLAGS_REVLOG_MAIN
425 425 elif f.endswith(REVLOG_FILES_OTHER_EXT):
426 426 t = FILETYPE_FILELOG_OTHER
427 427 if f.endswith(REVLOG_FILES_VOLATILE_EXT):
428 428 t |= FILEFLAGS_VOLATILE
429 429 return t
430 430 return None
431 431
432 432
433 433 # the file is part of changelog data
434 434 FILEFLAGS_CHANGELOG = 1 << 13
435 435 # the file is part of manifest data
436 436 FILEFLAGS_MANIFESTLOG = 1 << 12
437 437 # the file is part of filelog data
438 438 FILEFLAGS_FILELOG = 1 << 11
439 439 # file that are not directly part of a revlog
440 440 FILEFLAGS_OTHER = 1 << 10
441 441
442 442 # the main entry point for a revlog
443 443 FILEFLAGS_REVLOG_MAIN = 1 << 1
444 444 # a secondary file for a revlog
445 445 FILEFLAGS_REVLOG_OTHER = 1 << 0
446 446
447 447 # files that are "volatile" and might change between listing and streaming
448 448 FILEFLAGS_VOLATILE = 1 << 20
449 449
450 450 FILETYPE_CHANGELOG_MAIN = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_MAIN
451 451 FILETYPE_CHANGELOG_OTHER = FILEFLAGS_CHANGELOG | FILEFLAGS_REVLOG_OTHER
452 452 FILETYPE_MANIFESTLOG_MAIN = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_MAIN
453 453 FILETYPE_MANIFESTLOG_OTHER = FILEFLAGS_MANIFESTLOG | FILEFLAGS_REVLOG_OTHER
454 454 FILETYPE_FILELOG_MAIN = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_MAIN
455 455 FILETYPE_FILELOG_OTHER = FILEFLAGS_FILELOG | FILEFLAGS_REVLOG_OTHER
456 456 FILETYPE_OTHER = FILEFLAGS_OTHER
457 457
458 458
459 459 class basicstore(object):
460 460 '''base class for local repository stores'''
461 461
462 462 def __init__(self, path, vfstype):
463 463 vfs = vfstype(path)
464 464 self.path = vfs.base
465 465 self.createmode = _calcmode(vfs)
466 466 vfs.createmode = self.createmode
467 467 self.rawvfs = vfs
468 468 self.vfs = vfsmod.filtervfs(vfs, encodedir)
469 469 self.opener = self.vfs
470 470
471 471 def join(self, f):
472 472 return self.path + b'/' + encodedir(f)
473 473
474 474 def _walk(self, relpath, recurse):
475 475 '''yields (unencoded, encoded, size)'''
476 476 path = self.path
477 477 if relpath:
478 478 path += b'/' + relpath
479 479 striplen = len(self.path) + 1
480 480 l = []
481 481 if self.rawvfs.isdir(path):
482 482 visit = [path]
483 483 readdir = self.rawvfs.readdir
484 484 while visit:
485 485 p = visit.pop()
486 486 for f, kind, st in readdir(p, stat=True):
487 487 fp = p + b'/' + f
488 488 rl_type = is_revlog(f, kind, st)
489 489 if rl_type is not None:
490 490 n = util.pconvert(fp[striplen:])
491 491 l.append((rl_type, decodedir(n), n, st.st_size))
492 492 elif kind == stat.S_IFDIR and recurse:
493 493 visit.append(fp)
494 494 l.sort()
495 495 return l
496 496
497 497 def changelog(self, trypending, concurrencychecker=None):
498 498 return changelog.changelog(
499 499 self.vfs,
500 500 trypending=trypending,
501 501 concurrencychecker=concurrencychecker,
502 502 )
503 503
504 504 def manifestlog(self, repo, storenarrowmatch):
505 505 rootstore = manifest.manifestrevlog(repo.nodeconstants, self.vfs)
506 506 return manifest.manifestlog(self.vfs, repo, rootstore, storenarrowmatch)
507 507
508 508 def datafiles(self, matcher=None):
509 509 files = self._walk(b'data', True) + self._walk(b'meta', True)
510 510 for (t, u, e, s) in files:
511 511 yield (FILEFLAGS_FILELOG | t, u, e, s)
512 512
513 513 def topfiles(self):
514 514 # yield manifest before changelog
515 515 files = reversed(self._walk(b'', False))
516 516 for (t, u, e, s) in files:
517 517 if u.startswith(b'00changelog'):
518 518 yield (FILEFLAGS_CHANGELOG | t, u, e, s)
519 519 elif u.startswith(b'00manifest'):
520 520 yield (FILEFLAGS_MANIFESTLOG | t, u, e, s)
521 521 else:
522 522 yield (FILETYPE_OTHER | t, u, e, s)
523 523
524 524 def walk(self, matcher=None):
525 525 """return file related to data storage (ie: revlogs)
526 526
527 527 yields (file_type, unencoded, encoded, size)
528 528
529 529 if a matcher is passed, storage files of only those tracked paths
530 530 are passed with matches the matcher
531 531 """
532 532 # yield data files first
533 533 for x in self.datafiles(matcher):
534 534 yield x
535 535 for x in self.topfiles():
536 536 yield x
537 537
538 538 def copylist(self):
539 539 return _data
540 540
541 541 def write(self, tr):
542 542 pass
543 543
544 544 def invalidatecaches(self):
545 545 pass
546 546
547 547 def markremoved(self, fn):
548 548 pass
549 549
550 550 def __contains__(self, path):
551 551 '''Checks if the store contains path'''
552 552 path = b"/".join((b"data", path))
553 553 # file?
554 554 if self.vfs.exists(path + b".i"):
555 555 return True
556 556 # dir?
557 557 if not path.endswith(b"/"):
558 558 path = path + b"/"
559 559 return self.vfs.exists(path)
560 560
561 561
562 562 class encodedstore(basicstore):
563 563 def __init__(self, path, vfstype):
564 564 vfs = vfstype(path + b'/store')
565 565 self.path = vfs.base
566 566 self.createmode = _calcmode(vfs)
567 567 vfs.createmode = self.createmode
568 568 self.rawvfs = vfs
569 569 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
570 570 self.opener = self.vfs
571 571
572 # note: topfiles would also need a decode phase. It is just that in
573 # practice we do not have any file outside of `data/` that needs encoding.
574 # However that might change so we should probably add a test and encoding
575 # decoding for it too. see issue6548
576
572 577 def datafiles(self, matcher=None):
573 578 for t, a, b, size in super(encodedstore, self).datafiles():
574 579 try:
575 580 a = decodefilename(a)
576 581 except KeyError:
577 582 a = None
578 583 if a is not None and not _matchtrackedpath(a, matcher):
579 584 continue
580 585 yield t, a, b, size
581 586
582 587 def join(self, f):
583 588 return self.path + b'/' + encodefilename(f)
584 589
585 590 def copylist(self):
586 591 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in _data]
587 592
588 593
589 594 class fncache(object):
590 595 # the filename used to be partially encoded
591 596 # hence the encodedir/decodedir dance
592 597 def __init__(self, vfs):
593 598 self.vfs = vfs
594 599 self.entries = None
595 600 self._dirty = False
596 601 # set of new additions to fncache
597 602 self.addls = set()
598 603
599 604 def ensureloaded(self, warn=None):
600 605 """read the fncache file if not already read.
601 606
602 607 If the file on disk is corrupted, raise. If warn is provided,
603 608 warn and keep going instead."""
604 609 if self.entries is None:
605 610 self._load(warn)
606 611
607 612 def _load(self, warn=None):
608 613 '''fill the entries from the fncache file'''
609 614 self._dirty = False
610 615 try:
611 616 fp = self.vfs(b'fncache', mode=b'rb')
612 617 except IOError:
613 618 # skip nonexistent file
614 619 self.entries = set()
615 620 return
616 621
617 622 self.entries = set()
618 623 chunk = b''
619 624 for c in iter(functools.partial(fp.read, fncache_chunksize), b''):
620 625 chunk += c
621 626 try:
622 627 p = chunk.rindex(b'\n')
623 628 self.entries.update(decodedir(chunk[: p + 1]).splitlines())
624 629 chunk = chunk[p + 1 :]
625 630 except ValueError:
626 631 # substring '\n' not found, maybe the entry is bigger than the
627 632 # chunksize, so let's keep iterating
628 633 pass
629 634
630 635 if chunk:
631 636 msg = _(b"fncache does not ends with a newline")
632 637 if warn:
633 638 warn(msg + b'\n')
634 639 else:
635 640 raise error.Abort(
636 641 msg,
637 642 hint=_(
638 643 b"use 'hg debugrebuildfncache' to "
639 644 b"rebuild the fncache"
640 645 ),
641 646 )
642 647 self._checkentries(fp, warn)
643 648 fp.close()
644 649
645 650 def _checkentries(self, fp, warn):
646 651 """make sure there is no empty string in entries"""
647 652 if b'' in self.entries:
648 653 fp.seek(0)
649 654 for n, line in enumerate(util.iterfile(fp)):
650 655 if not line.rstrip(b'\n'):
651 656 t = _(b'invalid entry in fncache, line %d') % (n + 1)
652 657 if warn:
653 658 warn(t + b'\n')
654 659 else:
655 660 raise error.Abort(t)
656 661
657 662 def write(self, tr):
658 663 if self._dirty:
659 664 assert self.entries is not None
660 665 self.entries = self.entries | self.addls
661 666 self.addls = set()
662 667 tr.addbackup(b'fncache')
663 668 fp = self.vfs(b'fncache', mode=b'wb', atomictemp=True)
664 669 if self.entries:
665 670 fp.write(encodedir(b'\n'.join(self.entries) + b'\n'))
666 671 fp.close()
667 672 self._dirty = False
668 673 if self.addls:
669 674 # if we have just new entries, let's append them to the fncache
670 675 tr.addbackup(b'fncache')
671 676 fp = self.vfs(b'fncache', mode=b'ab', atomictemp=True)
672 677 if self.addls:
673 678 fp.write(encodedir(b'\n'.join(self.addls) + b'\n'))
674 679 fp.close()
675 680 self.entries = None
676 681 self.addls = set()
677 682
678 683 def add(self, fn):
679 684 if self.entries is None:
680 685 self._load()
681 686 if fn not in self.entries:
682 687 self.addls.add(fn)
683 688
684 689 def remove(self, fn):
685 690 if self.entries is None:
686 691 self._load()
687 692 if fn in self.addls:
688 693 self.addls.remove(fn)
689 694 return
690 695 try:
691 696 self.entries.remove(fn)
692 697 self._dirty = True
693 698 except KeyError:
694 699 pass
695 700
696 701 def __contains__(self, fn):
697 702 if fn in self.addls:
698 703 return True
699 704 if self.entries is None:
700 705 self._load()
701 706 return fn in self.entries
702 707
703 708 def __iter__(self):
704 709 if self.entries is None:
705 710 self._load()
706 711 return iter(self.entries | self.addls)
707 712
708 713
709 714 class _fncachevfs(vfsmod.proxyvfs):
710 715 def __init__(self, vfs, fnc, encode):
711 716 vfsmod.proxyvfs.__init__(self, vfs)
712 717 self.fncache = fnc
713 718 self.encode = encode
714 719
715 720 def __call__(self, path, mode=b'r', *args, **kw):
716 721 encoded = self.encode(path)
717 722 if mode not in (b'r', b'rb') and (
718 723 path.startswith(b'data/') or path.startswith(b'meta/')
719 724 ):
720 725 # do not trigger a fncache load when adding a file that already is
721 726 # known to exist.
722 727 notload = self.fncache.entries is None and self.vfs.exists(encoded)
723 728 if notload and b'r+' in mode and not self.vfs.stat(encoded).st_size:
724 729 # when appending to an existing file, if the file has size zero,
725 730 # it should be considered as missing. Such zero-size files are
726 731 # the result of truncation when a transaction is aborted.
727 732 notload = False
728 733 if not notload:
729 734 self.fncache.add(path)
730 735 return self.vfs(encoded, mode, *args, **kw)
731 736
732 737 def join(self, path):
733 738 if path:
734 739 return self.vfs.join(self.encode(path))
735 740 else:
736 741 return self.vfs.join(path)
737 742
738 743 def register_file(self, path):
739 744 """generic hook point to lets fncache steer its stew"""
740 745 if path.startswith(b'data/') or path.startswith(b'meta/'):
741 746 self.fncache.add(path)
742 747
743 748
744 749 class fncachestore(basicstore):
745 750 def __init__(self, path, vfstype, dotencode):
746 751 if dotencode:
747 752 encode = _pathencode
748 753 else:
749 754 encode = _plainhybridencode
750 755 self.encode = encode
751 756 vfs = vfstype(path + b'/store')
752 757 self.path = vfs.base
753 758 self.pathsep = self.path + b'/'
754 759 self.createmode = _calcmode(vfs)
755 760 vfs.createmode = self.createmode
756 761 self.rawvfs = vfs
757 762 fnc = fncache(vfs)
758 763 self.fncache = fnc
759 764 self.vfs = _fncachevfs(vfs, fnc, encode)
760 765 self.opener = self.vfs
761 766
762 767 def join(self, f):
763 768 return self.pathsep + self.encode(f)
764 769
765 770 def getsize(self, path):
766 771 return self.rawvfs.stat(path).st_size
767 772
768 773 def datafiles(self, matcher=None):
769 774 for f in sorted(self.fncache):
770 775 if not _matchtrackedpath(f, matcher):
771 776 continue
772 777 ef = self.encode(f)
773 778 try:
774 779 t = revlog_type(f)
775 780 assert t is not None, f
776 781 t |= FILEFLAGS_FILELOG
777 782 yield t, f, ef, self.getsize(ef)
778 783 except OSError as err:
779 784 if err.errno != errno.ENOENT:
780 785 raise
781 786
782 787 def copylist(self):
783 788 d = (
784 789 b'bookmarks',
785 790 b'narrowspec',
786 791 b'data',
787 792 b'meta',
788 793 b'dh',
789 794 b'fncache',
790 795 b'phaseroots',
791 796 b'obsstore',
792 797 b'00manifest.d',
793 798 b'00manifest.i',
794 799 b'00changelog.d',
795 800 b'00changelog.i',
796 801 b'requires',
797 802 )
798 803 return [b'requires', b'00changelog.i'] + [b'store/' + f for f in d]
799 804
800 805 def write(self, tr):
801 806 self.fncache.write(tr)
802 807
803 808 def invalidatecaches(self):
804 809 self.fncache.entries = None
805 810 self.fncache.addls = set()
806 811
807 812 def markremoved(self, fn):
808 813 self.fncache.remove(fn)
809 814
810 815 def _exists(self, f):
811 816 ef = self.encode(f)
812 817 try:
813 818 self.getsize(ef)
814 819 return True
815 820 except OSError as err:
816 821 if err.errno != errno.ENOENT:
817 822 raise
818 823 # nonexistent entry
819 824 return False
820 825
821 826 def __contains__(self, path):
822 827 '''Checks if the store contains path'''
823 828 path = b"/".join((b"data", path))
824 829 # check for files (exact match)
825 830 e = path + b'.i'
826 831 if e in self.fncache and self._exists(e):
827 832 return True
828 833 # now check for directories (prefix match)
829 834 if not path.endswith(b'/'):
830 835 path += b'/'
831 836 for e in self.fncache:
832 837 if e.startswith(path) and self._exists(e):
833 838 return True
834 839 return False
General Comments 0
You need to be logged in to leave comments. Login now