##// END OF EJS Templates
store: raise ProgrammingError if unable to decode a storage path...
Pulkit Goyal -
r40658:66adfd58 default
parent child Browse files
Show More
@@ -1,609 +1,611 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 node,
19 19 policy,
20 20 pycompat,
21 21 util,
22 22 vfs as vfsmod,
23 23 )
24 24
25 25 parsers = policy.importmod(r'parsers')
26 26
27 27 def _matchtrackedpath(path, matcher):
28 28 """parses a fncache entry and returns whether the entry is tracking a path
29 29 matched by matcher or not.
30 30
31 31 If matcher is None, returns True"""
32 32
33 33 if matcher is None:
34 34 return True
35 35 path = decodedir(path)
36 36 if path.startswith('data/'):
37 37 return matcher(path[len('data/'):-len('.i')])
38 38 elif path.startswith('meta/'):
39 39 return matcher.visitdir(path[len('meta/'):-len('/00manifest.i')] or '.')
40 40
41 raise error.ProgrammingError("cannot decode path %s" % path)
42
41 43 # This avoids a collision between a file named foo and a dir named
42 44 # foo.i or foo.d
43 45 def _encodedir(path):
44 46 '''
45 47 >>> _encodedir(b'data/foo.i')
46 48 'data/foo.i'
47 49 >>> _encodedir(b'data/foo.i/bla.i')
48 50 'data/foo.i.hg/bla.i'
49 51 >>> _encodedir(b'data/foo.i.hg/bla.i')
50 52 'data/foo.i.hg.hg/bla.i'
51 53 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
52 54 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
53 55 '''
54 56 return (path
55 57 .replace(".hg/", ".hg.hg/")
56 58 .replace(".i/", ".i.hg/")
57 59 .replace(".d/", ".d.hg/"))
58 60
59 61 encodedir = getattr(parsers, 'encodedir', _encodedir)
60 62
61 63 def decodedir(path):
62 64 '''
63 65 >>> decodedir(b'data/foo.i')
64 66 'data/foo.i'
65 67 >>> decodedir(b'data/foo.i.hg/bla.i')
66 68 'data/foo.i/bla.i'
67 69 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
68 70 'data/foo.i.hg/bla.i'
69 71 '''
70 72 if ".hg/" not in path:
71 73 return path
72 74 return (path
73 75 .replace(".d.hg/", ".d/")
74 76 .replace(".i.hg/", ".i/")
75 77 .replace(".hg.hg/", ".hg/"))
76 78
77 79 def _reserved():
78 80 ''' characters that are problematic for filesystems
79 81
80 82 * ascii escapes (0..31)
81 83 * ascii hi (126..255)
82 84 * windows specials
83 85
84 86 these characters will be escaped by encodefunctions
85 87 '''
86 88 winreserved = [ord(x) for x in u'\\:*?"<>|']
87 89 for x in range(32):
88 90 yield x
89 91 for x in range(126, 256):
90 92 yield x
91 93 for x in winreserved:
92 94 yield x
93 95
94 96 def _buildencodefun():
95 97 '''
96 98 >>> enc, dec = _buildencodefun()
97 99
98 100 >>> enc(b'nothing/special.txt')
99 101 'nothing/special.txt'
100 102 >>> dec(b'nothing/special.txt')
101 103 'nothing/special.txt'
102 104
103 105 >>> enc(b'HELLO')
104 106 '_h_e_l_l_o'
105 107 >>> dec(b'_h_e_l_l_o')
106 108 'HELLO'
107 109
108 110 >>> enc(b'hello:world?')
109 111 'hello~3aworld~3f'
110 112 >>> dec(b'hello~3aworld~3f')
111 113 'hello:world?'
112 114
113 115 >>> enc(b'the\\x07quick\\xADshot')
114 116 'the~07quick~adshot'
115 117 >>> dec(b'the~07quick~adshot')
116 118 'the\\x07quick\\xadshot'
117 119 '''
118 120 e = '_'
119 121 xchr = pycompat.bytechr
120 122 asciistr = list(map(xchr, range(127)))
121 123 capitals = list(range(ord("A"), ord("Z") + 1))
122 124
123 125 cmap = dict((x, x) for x in asciistr)
124 126 for x in _reserved():
125 127 cmap[xchr(x)] = "~%02x" % x
126 128 for x in capitals + [ord(e)]:
127 129 cmap[xchr(x)] = e + xchr(x).lower()
128 130
129 131 dmap = {}
130 132 for k, v in cmap.iteritems():
131 133 dmap[v] = k
132 134 def decode(s):
133 135 i = 0
134 136 while i < len(s):
135 137 for l in pycompat.xrange(1, 4):
136 138 try:
137 139 yield dmap[s[i:i + l]]
138 140 i += l
139 141 break
140 142 except KeyError:
141 143 pass
142 144 else:
143 145 raise KeyError
144 146 return (lambda s: ''.join([cmap[s[c:c + 1]]
145 147 for c in pycompat.xrange(len(s))]),
146 148 lambda s: ''.join(list(decode(s))))
147 149
148 150 _encodefname, _decodefname = _buildencodefun()
149 151
150 152 def encodefilename(s):
151 153 '''
152 154 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
153 155 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
154 156 '''
155 157 return _encodefname(encodedir(s))
156 158
157 159 def decodefilename(s):
158 160 '''
159 161 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
160 162 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
161 163 '''
162 164 return decodedir(_decodefname(s))
163 165
164 166 def _buildlowerencodefun():
165 167 '''
166 168 >>> f = _buildlowerencodefun()
167 169 >>> f(b'nothing/special.txt')
168 170 'nothing/special.txt'
169 171 >>> f(b'HELLO')
170 172 'hello'
171 173 >>> f(b'hello:world?')
172 174 'hello~3aworld~3f'
173 175 >>> f(b'the\\x07quick\\xADshot')
174 176 'the~07quick~adshot'
175 177 '''
176 178 xchr = pycompat.bytechr
177 179 cmap = dict([(xchr(x), xchr(x)) for x in pycompat.xrange(127)])
178 180 for x in _reserved():
179 181 cmap[xchr(x)] = "~%02x" % x
180 182 for x in range(ord("A"), ord("Z") + 1):
181 183 cmap[xchr(x)] = xchr(x).lower()
182 184 def lowerencode(s):
183 185 return "".join([cmap[c] for c in pycompat.iterbytestr(s)])
184 186 return lowerencode
185 187
186 188 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
187 189
188 190 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
189 191 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
190 192 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
191 193 def _auxencode(path, dotencode):
192 194 '''
193 195 Encodes filenames containing names reserved by Windows or which end in
194 196 period or space. Does not touch other single reserved characters c.
195 197 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
196 198 Additionally encodes space or period at the beginning, if dotencode is
197 199 True. Parameter path is assumed to be all lowercase.
198 200 A segment only needs encoding if a reserved name appears as a
199 201 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
200 202 doesn't need encoding.
201 203
202 204 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
203 205 >>> _auxencode(s.split(b'/'), True)
204 206 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
205 207 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
206 208 >>> _auxencode(s.split(b'/'), False)
207 209 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
208 210 >>> _auxencode([b'foo. '], True)
209 211 ['foo.~20']
210 212 >>> _auxencode([b' .foo'], True)
211 213 ['~20.foo']
212 214 '''
213 215 for i, n in enumerate(path):
214 216 if not n:
215 217 continue
216 218 if dotencode and n[0] in '. ':
217 219 n = "~%02x" % ord(n[0:1]) + n[1:]
218 220 path[i] = n
219 221 else:
220 222 l = n.find('.')
221 223 if l == -1:
222 224 l = len(n)
223 225 if ((l == 3 and n[:3] in _winres3) or
224 226 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
225 227 and n[:3] in _winres4)):
226 228 # encode third letter ('aux' -> 'au~78')
227 229 ec = "~%02x" % ord(n[2:3])
228 230 n = n[0:2] + ec + n[3:]
229 231 path[i] = n
230 232 if n[-1] in '. ':
231 233 # encode last period or space ('foo...' -> 'foo..~2e')
232 234 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
233 235 return path
234 236
235 237 _maxstorepathlen = 120
236 238 _dirprefixlen = 8
237 239 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
238 240
239 241 def _hashencode(path, dotencode):
240 242 digest = node.hex(hashlib.sha1(path).digest())
241 243 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
242 244 parts = _auxencode(le, dotencode)
243 245 basename = parts[-1]
244 246 _root, ext = os.path.splitext(basename)
245 247 sdirs = []
246 248 sdirslen = 0
247 249 for p in parts[:-1]:
248 250 d = p[:_dirprefixlen]
249 251 if d[-1] in '. ':
250 252 # Windows can't access dirs ending in period or space
251 253 d = d[:-1] + '_'
252 254 if sdirslen == 0:
253 255 t = len(d)
254 256 else:
255 257 t = sdirslen + 1 + len(d)
256 258 if t > _maxshortdirslen:
257 259 break
258 260 sdirs.append(d)
259 261 sdirslen = t
260 262 dirs = '/'.join(sdirs)
261 263 if len(dirs) > 0:
262 264 dirs += '/'
263 265 res = 'dh/' + dirs + digest + ext
264 266 spaceleft = _maxstorepathlen - len(res)
265 267 if spaceleft > 0:
266 268 filler = basename[:spaceleft]
267 269 res = 'dh/' + dirs + filler + digest + ext
268 270 return res
269 271
270 272 def _hybridencode(path, dotencode):
271 273 '''encodes path with a length limit
272 274
273 275 Encodes all paths that begin with 'data/', according to the following.
274 276
275 277 Default encoding (reversible):
276 278
277 279 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
278 280 characters are encoded as '~xx', where xx is the two digit hex code
279 281 of the character (see encodefilename).
280 282 Relevant path components consisting of Windows reserved filenames are
281 283 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
282 284
283 285 Hashed encoding (not reversible):
284 286
285 287 If the default-encoded path is longer than _maxstorepathlen, a
286 288 non-reversible hybrid hashing of the path is done instead.
287 289 This encoding uses up to _dirprefixlen characters of all directory
288 290 levels of the lowerencoded path, but not more levels than can fit into
289 291 _maxshortdirslen.
290 292 Then follows the filler followed by the sha digest of the full path.
291 293 The filler is the beginning of the basename of the lowerencoded path
292 294 (the basename is everything after the last path separator). The filler
293 295 is as long as possible, filling in characters from the basename until
294 296 the encoded path has _maxstorepathlen characters (or all chars of the
295 297 basename have been taken).
296 298 The extension (e.g. '.i' or '.d') is preserved.
297 299
298 300 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
299 301 encoding was used.
300 302 '''
301 303 path = encodedir(path)
302 304 ef = _encodefname(path).split('/')
303 305 res = '/'.join(_auxencode(ef, dotencode))
304 306 if len(res) > _maxstorepathlen:
305 307 res = _hashencode(path, dotencode)
306 308 return res
307 309
308 310 def _pathencode(path):
309 311 de = encodedir(path)
310 312 if len(path) > _maxstorepathlen:
311 313 return _hashencode(de, True)
312 314 ef = _encodefname(de).split('/')
313 315 res = '/'.join(_auxencode(ef, True))
314 316 if len(res) > _maxstorepathlen:
315 317 return _hashencode(de, True)
316 318 return res
317 319
318 320 _pathencode = getattr(parsers, 'pathencode', _pathencode)
319 321
320 322 def _plainhybridencode(f):
321 323 return _hybridencode(f, False)
322 324
323 325 def _calcmode(vfs):
324 326 try:
325 327 # files in .hg/ will be created using this mode
326 328 mode = vfs.stat().st_mode
327 329 # avoid some useless chmods
328 330 if (0o777 & ~util.umask) == (0o777 & mode):
329 331 mode = None
330 332 except OSError:
331 333 mode = None
332 334 return mode
333 335
334 336 _data = ('narrowspec data meta 00manifest.d 00manifest.i'
335 337 ' 00changelog.d 00changelog.i phaseroots obsstore')
336 338
337 339 def isrevlog(f, kind, st):
338 340 return kind == stat.S_IFREG and f[-2:] in ('.i', '.d')
339 341
340 342 class basicstore(object):
341 343 '''base class for local repository stores'''
342 344 def __init__(self, path, vfstype):
343 345 vfs = vfstype(path)
344 346 self.path = vfs.base
345 347 self.createmode = _calcmode(vfs)
346 348 vfs.createmode = self.createmode
347 349 self.rawvfs = vfs
348 350 self.vfs = vfsmod.filtervfs(vfs, encodedir)
349 351 self.opener = self.vfs
350 352
351 353 def join(self, f):
352 354 return self.path + '/' + encodedir(f)
353 355
354 356 def _walk(self, relpath, recurse, filefilter=isrevlog):
355 357 '''yields (unencoded, encoded, size)'''
356 358 path = self.path
357 359 if relpath:
358 360 path += '/' + relpath
359 361 striplen = len(self.path) + 1
360 362 l = []
361 363 if self.rawvfs.isdir(path):
362 364 visit = [path]
363 365 readdir = self.rawvfs.readdir
364 366 while visit:
365 367 p = visit.pop()
366 368 for f, kind, st in readdir(p, stat=True):
367 369 fp = p + '/' + f
368 370 if filefilter(f, kind, st):
369 371 n = util.pconvert(fp[striplen:])
370 372 l.append((decodedir(n), n, st.st_size))
371 373 elif kind == stat.S_IFDIR and recurse:
372 374 visit.append(fp)
373 375 l.sort()
374 376 return l
375 377
376 378 def datafiles(self, matcher=None):
377 379 return self._walk('data', True) + self._walk('meta', True)
378 380
379 381 def topfiles(self):
380 382 # yield manifest before changelog
381 383 return reversed(self._walk('', False))
382 384
383 385 def walk(self, matcher=None):
384 386 '''yields (unencoded, encoded, size)
385 387
386 388 if a matcher is passed, storage files of only those tracked paths
387 389 are passed with matches the matcher
388 390 '''
389 391 # yield data files first
390 392 for x in self.datafiles(matcher):
391 393 yield x
392 394 for x in self.topfiles():
393 395 yield x
394 396
395 397 def copylist(self):
396 398 return ['requires'] + _data.split()
397 399
398 400 def write(self, tr):
399 401 pass
400 402
401 403 def invalidatecaches(self):
402 404 pass
403 405
404 406 def markremoved(self, fn):
405 407 pass
406 408
407 409 def __contains__(self, path):
408 410 '''Checks if the store contains path'''
409 411 path = "/".join(("data", path))
410 412 # file?
411 413 if self.vfs.exists(path + ".i"):
412 414 return True
413 415 # dir?
414 416 if not path.endswith("/"):
415 417 path = path + "/"
416 418 return self.vfs.exists(path)
417 419
418 420 class encodedstore(basicstore):
419 421 def __init__(self, path, vfstype):
420 422 vfs = vfstype(path + '/store')
421 423 self.path = vfs.base
422 424 self.createmode = _calcmode(vfs)
423 425 vfs.createmode = self.createmode
424 426 self.rawvfs = vfs
425 427 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
426 428 self.opener = self.vfs
427 429
428 430 def datafiles(self, matcher=None):
429 431 for a, b, size in super(encodedstore, self).datafiles():
430 432 try:
431 433 a = decodefilename(a)
432 434 except KeyError:
433 435 a = None
434 436 if a is not None and not _matchtrackedpath(a, matcher):
435 437 continue
436 438 yield a, b, size
437 439
438 440 def join(self, f):
439 441 return self.path + '/' + encodefilename(f)
440 442
441 443 def copylist(self):
442 444 return (['requires', '00changelog.i'] +
443 445 ['store/' + f for f in _data.split()])
444 446
445 447 class fncache(object):
446 448 # the filename used to be partially encoded
447 449 # hence the encodedir/decodedir dance
448 450 def __init__(self, vfs):
449 451 self.vfs = vfs
450 452 self.entries = None
451 453 self._dirty = False
452 454
453 455 def _load(self):
454 456 '''fill the entries from the fncache file'''
455 457 self._dirty = False
456 458 try:
457 459 fp = self.vfs('fncache', mode='rb')
458 460 except IOError:
459 461 # skip nonexistent file
460 462 self.entries = set()
461 463 return
462 464 self.entries = set(decodedir(fp.read()).splitlines())
463 465 if '' in self.entries:
464 466 fp.seek(0)
465 467 for n, line in enumerate(util.iterfile(fp)):
466 468 if not line.rstrip('\n'):
467 469 t = _('invalid entry in fncache, line %d') % (n + 1)
468 470 raise error.Abort(t)
469 471 fp.close()
470 472
471 473 def write(self, tr):
472 474 if self._dirty:
473 475 assert self.entries is not None
474 476 tr.addbackup('fncache')
475 477 fp = self.vfs('fncache', mode='wb', atomictemp=True)
476 478 if self.entries:
477 479 fp.write(encodedir('\n'.join(self.entries) + '\n'))
478 480 fp.close()
479 481 self._dirty = False
480 482
481 483 def add(self, fn):
482 484 if self.entries is None:
483 485 self._load()
484 486 if fn not in self.entries:
485 487 self._dirty = True
486 488 self.entries.add(fn)
487 489
488 490 def remove(self, fn):
489 491 if self.entries is None:
490 492 self._load()
491 493 try:
492 494 self.entries.remove(fn)
493 495 self._dirty = True
494 496 except KeyError:
495 497 pass
496 498
497 499 def __contains__(self, fn):
498 500 if self.entries is None:
499 501 self._load()
500 502 return fn in self.entries
501 503
502 504 def __iter__(self):
503 505 if self.entries is None:
504 506 self._load()
505 507 return iter(self.entries)
506 508
507 509 class _fncachevfs(vfsmod.abstractvfs, vfsmod.proxyvfs):
508 510 def __init__(self, vfs, fnc, encode):
509 511 vfsmod.proxyvfs.__init__(self, vfs)
510 512 self.fncache = fnc
511 513 self.encode = encode
512 514
513 515 def __call__(self, path, mode='r', *args, **kw):
514 516 encoded = self.encode(path)
515 517 if mode not in ('r', 'rb') and (path.startswith('data/') or
516 518 path.startswith('meta/')):
517 519 # do not trigger a fncache load when adding a file that already is
518 520 # known to exist.
519 521 notload = self.fncache.entries is None and self.vfs.exists(encoded)
520 522 if notload and 'a' in mode and not self.vfs.stat(encoded).st_size:
521 523 # when appending to an existing file, if the file has size zero,
522 524 # it should be considered as missing. Such zero-size files are
523 525 # the result of truncation when a transaction is aborted.
524 526 notload = False
525 527 if not notload:
526 528 self.fncache.add(path)
527 529 return self.vfs(encoded, mode, *args, **kw)
528 530
529 531 def join(self, path):
530 532 if path:
531 533 return self.vfs.join(self.encode(path))
532 534 else:
533 535 return self.vfs.join(path)
534 536
535 537 class fncachestore(basicstore):
536 538 def __init__(self, path, vfstype, dotencode):
537 539 if dotencode:
538 540 encode = _pathencode
539 541 else:
540 542 encode = _plainhybridencode
541 543 self.encode = encode
542 544 vfs = vfstype(path + '/store')
543 545 self.path = vfs.base
544 546 self.pathsep = self.path + '/'
545 547 self.createmode = _calcmode(vfs)
546 548 vfs.createmode = self.createmode
547 549 self.rawvfs = vfs
548 550 fnc = fncache(vfs)
549 551 self.fncache = fnc
550 552 self.vfs = _fncachevfs(vfs, fnc, encode)
551 553 self.opener = self.vfs
552 554
553 555 def join(self, f):
554 556 return self.pathsep + self.encode(f)
555 557
556 558 def getsize(self, path):
557 559 return self.rawvfs.stat(path).st_size
558 560
559 561 def datafiles(self, matcher=None):
560 562 for f in sorted(self.fncache):
561 563 if not _matchtrackedpath(f, matcher):
562 564 continue
563 565 ef = self.encode(f)
564 566 try:
565 567 yield f, ef, self.getsize(ef)
566 568 except OSError as err:
567 569 if err.errno != errno.ENOENT:
568 570 raise
569 571
570 572 def copylist(self):
571 573 d = ('narrowspec data meta dh fncache phaseroots obsstore'
572 574 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
573 575 return (['requires', '00changelog.i'] +
574 576 ['store/' + f for f in d.split()])
575 577
576 578 def write(self, tr):
577 579 self.fncache.write(tr)
578 580
579 581 def invalidatecaches(self):
580 582 self.fncache.entries = None
581 583
582 584 def markremoved(self, fn):
583 585 self.fncache.remove(fn)
584 586
585 587 def _exists(self, f):
586 588 ef = self.encode(f)
587 589 try:
588 590 self.getsize(ef)
589 591 return True
590 592 except OSError as err:
591 593 if err.errno != errno.ENOENT:
592 594 raise
593 595 # nonexistent entry
594 596 return False
595 597
596 598 def __contains__(self, path):
597 599 '''Checks if the store contains path'''
598 600 path = "/".join(("data", path))
599 601 # check for files (exact match)
600 602 e = path + '.i'
601 603 if e in self.fncache and self._exists(e):
602 604 return True
603 605 # now check for directories (prefix match)
604 606 if not path.endswith('/'):
605 607 path += '/'
606 608 for e in self.fncache:
607 609 if e.startswith(path) and self._exists(e):
608 610 return True
609 611 return False
General Comments 0
You need to be logged in to leave comments. Login now