##// END OF EJS Templates
store: pass matcher to store.datafiles()...
Pulkit Goyal -
r40376:2d45b549 default
parent child Browse files
Show More
@@ -1,587 +1,591 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 node,
19 19 policy,
20 20 pycompat,
21 21 util,
22 22 vfs as vfsmod,
23 23 )
24 24
25 25 parsers = policy.importmod(r'parsers')
26 26
27 27 # This avoids a collision between a file named foo and a dir named
28 28 # foo.i or foo.d
29 29 def _encodedir(path):
30 30 '''
31 31 >>> _encodedir(b'data/foo.i')
32 32 'data/foo.i'
33 33 >>> _encodedir(b'data/foo.i/bla.i')
34 34 'data/foo.i.hg/bla.i'
35 35 >>> _encodedir(b'data/foo.i.hg/bla.i')
36 36 'data/foo.i.hg.hg/bla.i'
37 37 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
38 38 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
39 39 '''
40 40 return (path
41 41 .replace(".hg/", ".hg.hg/")
42 42 .replace(".i/", ".i.hg/")
43 43 .replace(".d/", ".d.hg/"))
44 44
45 45 encodedir = getattr(parsers, 'encodedir', _encodedir)
46 46
47 47 def decodedir(path):
48 48 '''
49 49 >>> decodedir(b'data/foo.i')
50 50 'data/foo.i'
51 51 >>> decodedir(b'data/foo.i.hg/bla.i')
52 52 'data/foo.i/bla.i'
53 53 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
54 54 'data/foo.i.hg/bla.i'
55 55 '''
56 56 if ".hg/" not in path:
57 57 return path
58 58 return (path
59 59 .replace(".d.hg/", ".d/")
60 60 .replace(".i.hg/", ".i/")
61 61 .replace(".hg.hg/", ".hg/"))
62 62
63 63 def _reserved():
64 64 ''' characters that are problematic for filesystems
65 65
66 66 * ascii escapes (0..31)
67 67 * ascii hi (126..255)
68 68 * windows specials
69 69
70 70 these characters will be escaped by encodefunctions
71 71 '''
72 72 winreserved = [ord(x) for x in u'\\:*?"<>|']
73 73 for x in range(32):
74 74 yield x
75 75 for x in range(126, 256):
76 76 yield x
77 77 for x in winreserved:
78 78 yield x
79 79
80 80 def _buildencodefun():
81 81 '''
82 82 >>> enc, dec = _buildencodefun()
83 83
84 84 >>> enc(b'nothing/special.txt')
85 85 'nothing/special.txt'
86 86 >>> dec(b'nothing/special.txt')
87 87 'nothing/special.txt'
88 88
89 89 >>> enc(b'HELLO')
90 90 '_h_e_l_l_o'
91 91 >>> dec(b'_h_e_l_l_o')
92 92 'HELLO'
93 93
94 94 >>> enc(b'hello:world?')
95 95 'hello~3aworld~3f'
96 96 >>> dec(b'hello~3aworld~3f')
97 97 'hello:world?'
98 98
99 99 >>> enc(b'the\\x07quick\\xADshot')
100 100 'the~07quick~adshot'
101 101 >>> dec(b'the~07quick~adshot')
102 102 'the\\x07quick\\xadshot'
103 103 '''
104 104 e = '_'
105 105 xchr = pycompat.bytechr
106 106 asciistr = list(map(xchr, range(127)))
107 107 capitals = list(range(ord("A"), ord("Z") + 1))
108 108
109 109 cmap = dict((x, x) for x in asciistr)
110 110 for x in _reserved():
111 111 cmap[xchr(x)] = "~%02x" % x
112 112 for x in capitals + [ord(e)]:
113 113 cmap[xchr(x)] = e + xchr(x).lower()
114 114
115 115 dmap = {}
116 116 for k, v in cmap.iteritems():
117 117 dmap[v] = k
118 118 def decode(s):
119 119 i = 0
120 120 while i < len(s):
121 121 for l in pycompat.xrange(1, 4):
122 122 try:
123 123 yield dmap[s[i:i + l]]
124 124 i += l
125 125 break
126 126 except KeyError:
127 127 pass
128 128 else:
129 129 raise KeyError
130 130 return (lambda s: ''.join([cmap[s[c:c + 1]]
131 131 for c in pycompat.xrange(len(s))]),
132 132 lambda s: ''.join(list(decode(s))))
133 133
134 134 _encodefname, _decodefname = _buildencodefun()
135 135
136 136 def encodefilename(s):
137 137 '''
138 138 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
139 139 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
140 140 '''
141 141 return _encodefname(encodedir(s))
142 142
143 143 def decodefilename(s):
144 144 '''
145 145 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
146 146 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
147 147 '''
148 148 return decodedir(_decodefname(s))
149 149
150 150 def _buildlowerencodefun():
151 151 '''
152 152 >>> f = _buildlowerencodefun()
153 153 >>> f(b'nothing/special.txt')
154 154 'nothing/special.txt'
155 155 >>> f(b'HELLO')
156 156 'hello'
157 157 >>> f(b'hello:world?')
158 158 'hello~3aworld~3f'
159 159 >>> f(b'the\\x07quick\\xADshot')
160 160 'the~07quick~adshot'
161 161 '''
162 162 xchr = pycompat.bytechr
163 163 cmap = dict([(xchr(x), xchr(x)) for x in pycompat.xrange(127)])
164 164 for x in _reserved():
165 165 cmap[xchr(x)] = "~%02x" % x
166 166 for x in range(ord("A"), ord("Z") + 1):
167 167 cmap[xchr(x)] = xchr(x).lower()
168 168 def lowerencode(s):
169 169 return "".join([cmap[c] for c in pycompat.iterbytestr(s)])
170 170 return lowerencode
171 171
172 172 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
173 173
174 174 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
175 175 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
176 176 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
177 177 def _auxencode(path, dotencode):
178 178 '''
179 179 Encodes filenames containing names reserved by Windows or which end in
180 180 period or space. Does not touch other single reserved characters c.
181 181 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
182 182 Additionally encodes space or period at the beginning, if dotencode is
183 183 True. Parameter path is assumed to be all lowercase.
184 184 A segment only needs encoding if a reserved name appears as a
185 185 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
186 186 doesn't need encoding.
187 187
188 188 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
189 189 >>> _auxencode(s.split(b'/'), True)
190 190 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
191 191 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
192 192 >>> _auxencode(s.split(b'/'), False)
193 193 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
194 194 >>> _auxencode([b'foo. '], True)
195 195 ['foo.~20']
196 196 >>> _auxencode([b' .foo'], True)
197 197 ['~20.foo']
198 198 '''
199 199 for i, n in enumerate(path):
200 200 if not n:
201 201 continue
202 202 if dotencode and n[0] in '. ':
203 203 n = "~%02x" % ord(n[0:1]) + n[1:]
204 204 path[i] = n
205 205 else:
206 206 l = n.find('.')
207 207 if l == -1:
208 208 l = len(n)
209 209 if ((l == 3 and n[:3] in _winres3) or
210 210 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
211 211 and n[:3] in _winres4)):
212 212 # encode third letter ('aux' -> 'au~78')
213 213 ec = "~%02x" % ord(n[2:3])
214 214 n = n[0:2] + ec + n[3:]
215 215 path[i] = n
216 216 if n[-1] in '. ':
217 217 # encode last period or space ('foo...' -> 'foo..~2e')
218 218 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
219 219 return path
220 220
221 221 _maxstorepathlen = 120
222 222 _dirprefixlen = 8
223 223 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
224 224
225 225 def _hashencode(path, dotencode):
226 226 digest = node.hex(hashlib.sha1(path).digest())
227 227 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
228 228 parts = _auxencode(le, dotencode)
229 229 basename = parts[-1]
230 230 _root, ext = os.path.splitext(basename)
231 231 sdirs = []
232 232 sdirslen = 0
233 233 for p in parts[:-1]:
234 234 d = p[:_dirprefixlen]
235 235 if d[-1] in '. ':
236 236 # Windows can't access dirs ending in period or space
237 237 d = d[:-1] + '_'
238 238 if sdirslen == 0:
239 239 t = len(d)
240 240 else:
241 241 t = sdirslen + 1 + len(d)
242 242 if t > _maxshortdirslen:
243 243 break
244 244 sdirs.append(d)
245 245 sdirslen = t
246 246 dirs = '/'.join(sdirs)
247 247 if len(dirs) > 0:
248 248 dirs += '/'
249 249 res = 'dh/' + dirs + digest + ext
250 250 spaceleft = _maxstorepathlen - len(res)
251 251 if spaceleft > 0:
252 252 filler = basename[:spaceleft]
253 253 res = 'dh/' + dirs + filler + digest + ext
254 254 return res
255 255
256 256 def _hybridencode(path, dotencode):
257 257 '''encodes path with a length limit
258 258
259 259 Encodes all paths that begin with 'data/', according to the following.
260 260
261 261 Default encoding (reversible):
262 262
263 263 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
264 264 characters are encoded as '~xx', where xx is the two digit hex code
265 265 of the character (see encodefilename).
266 266 Relevant path components consisting of Windows reserved filenames are
267 267 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
268 268
269 269 Hashed encoding (not reversible):
270 270
271 271 If the default-encoded path is longer than _maxstorepathlen, a
272 272 non-reversible hybrid hashing of the path is done instead.
273 273 This encoding uses up to _dirprefixlen characters of all directory
274 274 levels of the lowerencoded path, but not more levels than can fit into
275 275 _maxshortdirslen.
276 276 Then follows the filler followed by the sha digest of the full path.
277 277 The filler is the beginning of the basename of the lowerencoded path
278 278 (the basename is everything after the last path separator). The filler
279 279 is as long as possible, filling in characters from the basename until
280 280 the encoded path has _maxstorepathlen characters (or all chars of the
281 281 basename have been taken).
282 282 The extension (e.g. '.i' or '.d') is preserved.
283 283
284 284 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
285 285 encoding was used.
286 286 '''
287 287 path = encodedir(path)
288 288 ef = _encodefname(path).split('/')
289 289 res = '/'.join(_auxencode(ef, dotencode))
290 290 if len(res) > _maxstorepathlen:
291 291 res = _hashencode(path, dotencode)
292 292 return res
293 293
294 294 def _pathencode(path):
295 295 de = encodedir(path)
296 296 if len(path) > _maxstorepathlen:
297 297 return _hashencode(de, True)
298 298 ef = _encodefname(de).split('/')
299 299 res = '/'.join(_auxencode(ef, True))
300 300 if len(res) > _maxstorepathlen:
301 301 return _hashencode(de, True)
302 302 return res
303 303
304 304 _pathencode = getattr(parsers, 'pathencode', _pathencode)
305 305
306 306 def _plainhybridencode(f):
307 307 return _hybridencode(f, False)
308 308
309 309 def _calcmode(vfs):
310 310 try:
311 311 # files in .hg/ will be created using this mode
312 312 mode = vfs.stat().st_mode
313 313 # avoid some useless chmods
314 314 if (0o777 & ~util.umask) == (0o777 & mode):
315 315 mode = None
316 316 except OSError:
317 317 mode = None
318 318 return mode
319 319
320 320 _data = ('narrowspec data meta 00manifest.d 00manifest.i'
321 321 ' 00changelog.d 00changelog.i phaseroots obsstore')
322 322
323 323 def isrevlog(f, kind, st):
324 324 return kind == stat.S_IFREG and f[-2:] in ('.i', '.d')
325 325
326 326 class basicstore(object):
327 327 '''base class for local repository stores'''
328 328 def __init__(self, path, vfstype):
329 329 vfs = vfstype(path)
330 330 self.path = vfs.base
331 331 self.createmode = _calcmode(vfs)
332 332 vfs.createmode = self.createmode
333 333 self.rawvfs = vfs
334 334 self.vfs = vfsmod.filtervfs(vfs, encodedir)
335 335 self.opener = self.vfs
336 336
337 337 def join(self, f):
338 338 return self.path + '/' + encodedir(f)
339 339
340 340 def _walk(self, relpath, recurse, filefilter=isrevlog):
341 341 '''yields (unencoded, encoded, size)'''
342 342 path = self.path
343 343 if relpath:
344 344 path += '/' + relpath
345 345 striplen = len(self.path) + 1
346 346 l = []
347 347 if self.rawvfs.isdir(path):
348 348 visit = [path]
349 349 readdir = self.rawvfs.readdir
350 350 while visit:
351 351 p = visit.pop()
352 352 for f, kind, st in readdir(p, stat=True):
353 353 fp = p + '/' + f
354 354 if filefilter(f, kind, st):
355 355 n = util.pconvert(fp[striplen:])
356 356 l.append((decodedir(n), n, st.st_size))
357 357 elif kind == stat.S_IFDIR and recurse:
358 358 visit.append(fp)
359 359 l.sort()
360 360 return l
361 361
362 def datafiles(self):
362 def datafiles(self, matcher=None):
363 363 return self._walk('data', True) + self._walk('meta', True)
364 364
365 365 def topfiles(self):
366 366 # yield manifest before changelog
367 367 return reversed(self._walk('', False))
368 368
369 def walk(self):
370 '''yields (unencoded, encoded, size)'''
369 def walk(self, matcher=None):
370 '''yields (unencoded, encoded, size)
371
372 if a matcher is passed, storage files of only those tracked paths
373 are passed with matches the matcher
374 '''
371 375 # yield data files first
372 for x in self.datafiles():
376 for x in self.datafiles(matcher):
373 377 yield x
374 378 for x in self.topfiles():
375 379 yield x
376 380
377 381 def copylist(self):
378 382 return ['requires'] + _data.split()
379 383
380 384 def write(self, tr):
381 385 pass
382 386
383 387 def invalidatecaches(self):
384 388 pass
385 389
386 390 def markremoved(self, fn):
387 391 pass
388 392
389 393 def __contains__(self, path):
390 394 '''Checks if the store contains path'''
391 395 path = "/".join(("data", path))
392 396 # file?
393 397 if self.vfs.exists(path + ".i"):
394 398 return True
395 399 # dir?
396 400 if not path.endswith("/"):
397 401 path = path + "/"
398 402 return self.vfs.exists(path)
399 403
400 404 class encodedstore(basicstore):
401 405 def __init__(self, path, vfstype):
402 406 vfs = vfstype(path + '/store')
403 407 self.path = vfs.base
404 408 self.createmode = _calcmode(vfs)
405 409 vfs.createmode = self.createmode
406 410 self.rawvfs = vfs
407 411 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
408 412 self.opener = self.vfs
409 413
410 def datafiles(self):
414 def datafiles(self, matcher=None):
411 415 for a, b, size in super(encodedstore, self).datafiles():
412 416 try:
413 417 a = decodefilename(a)
414 418 except KeyError:
415 419 a = None
416 420 yield a, b, size
417 421
418 422 def join(self, f):
419 423 return self.path + '/' + encodefilename(f)
420 424
421 425 def copylist(self):
422 426 return (['requires', '00changelog.i'] +
423 427 ['store/' + f for f in _data.split()])
424 428
425 429 class fncache(object):
426 430 # the filename used to be partially encoded
427 431 # hence the encodedir/decodedir dance
428 432 def __init__(self, vfs):
429 433 self.vfs = vfs
430 434 self.entries = None
431 435 self._dirty = False
432 436
433 437 def _load(self):
434 438 '''fill the entries from the fncache file'''
435 439 self._dirty = False
436 440 try:
437 441 fp = self.vfs('fncache', mode='rb')
438 442 except IOError:
439 443 # skip nonexistent file
440 444 self.entries = set()
441 445 return
442 446 self.entries = set(decodedir(fp.read()).splitlines())
443 447 if '' in self.entries:
444 448 fp.seek(0)
445 449 for n, line in enumerate(util.iterfile(fp)):
446 450 if not line.rstrip('\n'):
447 451 t = _('invalid entry in fncache, line %d') % (n + 1)
448 452 raise error.Abort(t)
449 453 fp.close()
450 454
451 455 def write(self, tr):
452 456 if self._dirty:
453 457 assert self.entries is not None
454 458 tr.addbackup('fncache')
455 459 fp = self.vfs('fncache', mode='wb', atomictemp=True)
456 460 if self.entries:
457 461 fp.write(encodedir('\n'.join(self.entries) + '\n'))
458 462 fp.close()
459 463 self._dirty = False
460 464
461 465 def add(self, fn):
462 466 if self.entries is None:
463 467 self._load()
464 468 if fn not in self.entries:
465 469 self._dirty = True
466 470 self.entries.add(fn)
467 471
468 472 def remove(self, fn):
469 473 if self.entries is None:
470 474 self._load()
471 475 try:
472 476 self.entries.remove(fn)
473 477 self._dirty = True
474 478 except KeyError:
475 479 pass
476 480
477 481 def __contains__(self, fn):
478 482 if self.entries is None:
479 483 self._load()
480 484 return fn in self.entries
481 485
482 486 def __iter__(self):
483 487 if self.entries is None:
484 488 self._load()
485 489 return iter(self.entries)
486 490
487 491 class _fncachevfs(vfsmod.abstractvfs, vfsmod.proxyvfs):
488 492 def __init__(self, vfs, fnc, encode):
489 493 vfsmod.proxyvfs.__init__(self, vfs)
490 494 self.fncache = fnc
491 495 self.encode = encode
492 496
493 497 def __call__(self, path, mode='r', *args, **kw):
494 498 encoded = self.encode(path)
495 499 if mode not in ('r', 'rb') and (path.startswith('data/') or
496 500 path.startswith('meta/')):
497 501 # do not trigger a fncache load when adding a file that already is
498 502 # known to exist.
499 503 notload = self.fncache.entries is None and self.vfs.exists(encoded)
500 504 if notload and 'a' in mode and not self.vfs.stat(encoded).st_size:
501 505 # when appending to an existing file, if the file has size zero,
502 506 # it should be considered as missing. Such zero-size files are
503 507 # the result of truncation when a transaction is aborted.
504 508 notload = False
505 509 if not notload:
506 510 self.fncache.add(path)
507 511 return self.vfs(encoded, mode, *args, **kw)
508 512
509 513 def join(self, path):
510 514 if path:
511 515 return self.vfs.join(self.encode(path))
512 516 else:
513 517 return self.vfs.join(path)
514 518
515 519 class fncachestore(basicstore):
516 520 def __init__(self, path, vfstype, dotencode):
517 521 if dotencode:
518 522 encode = _pathencode
519 523 else:
520 524 encode = _plainhybridencode
521 525 self.encode = encode
522 526 vfs = vfstype(path + '/store')
523 527 self.path = vfs.base
524 528 self.pathsep = self.path + '/'
525 529 self.createmode = _calcmode(vfs)
526 530 vfs.createmode = self.createmode
527 531 self.rawvfs = vfs
528 532 fnc = fncache(vfs)
529 533 self.fncache = fnc
530 534 self.vfs = _fncachevfs(vfs, fnc, encode)
531 535 self.opener = self.vfs
532 536
533 537 def join(self, f):
534 538 return self.pathsep + self.encode(f)
535 539
536 540 def getsize(self, path):
537 541 return self.rawvfs.stat(path).st_size
538 542
539 def datafiles(self):
543 def datafiles(self, matcher=None):
540 544 for f in sorted(self.fncache):
541 545 ef = self.encode(f)
542 546 try:
543 547 yield f, ef, self.getsize(ef)
544 548 except OSError as err:
545 549 if err.errno != errno.ENOENT:
546 550 raise
547 551
548 552 def copylist(self):
549 553 d = ('narrowspec data meta dh fncache phaseroots obsstore'
550 554 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
551 555 return (['requires', '00changelog.i'] +
552 556 ['store/' + f for f in d.split()])
553 557
554 558 def write(self, tr):
555 559 self.fncache.write(tr)
556 560
557 561 def invalidatecaches(self):
558 562 self.fncache.entries = None
559 563
560 564 def markremoved(self, fn):
561 565 self.fncache.remove(fn)
562 566
563 567 def _exists(self, f):
564 568 ef = self.encode(f)
565 569 try:
566 570 self.getsize(ef)
567 571 return True
568 572 except OSError as err:
569 573 if err.errno != errno.ENOENT:
570 574 raise
571 575 # nonexistent entry
572 576 return False
573 577
574 578 def __contains__(self, path):
575 579 '''Checks if the store contains path'''
576 580 path = "/".join(("data", path))
577 581 # check for files (exact match)
578 582 e = path + '.i'
579 583 if e in self.fncache and self._exists(e):
580 584 return True
581 585 # now check for directories (prefix match)
582 586 if not path.endswith('/'):
583 587 path += '/'
584 588 for e in self.fncache:
585 589 if e.startswith(path) and self._exists(e):
586 590 return True
587 591 return False
General Comments 0
You need to be logged in to leave comments. Login now