##// END OF EJS Templates
store: give name to lowerencode function...
Yuya Nishihara -
r34210:96808804 default
parent child Browse files
Show More
@@ -1,575 +1,577 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 policy,
19 19 pycompat,
20 20 util,
21 21 vfs as vfsmod,
22 22 )
23 23
24 24 parsers = policy.importmod(r'parsers')
25 25
26 26 # This avoids a collision between a file named foo and a dir named
27 27 # foo.i or foo.d
28 28 def _encodedir(path):
29 29 '''
30 30 >>> _encodedir(b'data/foo.i')
31 31 'data/foo.i'
32 32 >>> _encodedir(b'data/foo.i/bla.i')
33 33 'data/foo.i.hg/bla.i'
34 34 >>> _encodedir(b'data/foo.i.hg/bla.i')
35 35 'data/foo.i.hg.hg/bla.i'
36 36 >>> _encodedir(b'data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
37 37 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
38 38 '''
39 39 return (path
40 40 .replace(".hg/", ".hg.hg/")
41 41 .replace(".i/", ".i.hg/")
42 42 .replace(".d/", ".d.hg/"))
43 43
44 44 encodedir = getattr(parsers, 'encodedir', _encodedir)
45 45
46 46 def decodedir(path):
47 47 '''
48 48 >>> decodedir(b'data/foo.i')
49 49 'data/foo.i'
50 50 >>> decodedir(b'data/foo.i.hg/bla.i')
51 51 'data/foo.i/bla.i'
52 52 >>> decodedir(b'data/foo.i.hg.hg/bla.i')
53 53 'data/foo.i.hg/bla.i'
54 54 '''
55 55 if ".hg/" not in path:
56 56 return path
57 57 return (path
58 58 .replace(".d.hg/", ".d/")
59 59 .replace(".i.hg/", ".i/")
60 60 .replace(".hg.hg/", ".hg/"))
61 61
62 62 def _reserved():
63 63 ''' characters that are problematic for filesystems
64 64
65 65 * ascii escapes (0..31)
66 66 * ascii hi (126..255)
67 67 * windows specials
68 68
69 69 these characters will be escaped by encodefunctions
70 70 '''
71 71 winreserved = [ord(x) for x in u'\\:*?"<>|']
72 72 for x in range(32):
73 73 yield x
74 74 for x in range(126, 256):
75 75 yield x
76 76 for x in winreserved:
77 77 yield x
78 78
79 79 def _buildencodefun():
80 80 '''
81 81 >>> enc, dec = _buildencodefun()
82 82
83 83 >>> enc(b'nothing/special.txt')
84 84 'nothing/special.txt'
85 85 >>> dec(b'nothing/special.txt')
86 86 'nothing/special.txt'
87 87
88 88 >>> enc(b'HELLO')
89 89 '_h_e_l_l_o'
90 90 >>> dec(b'_h_e_l_l_o')
91 91 'HELLO'
92 92
93 93 >>> enc(b'hello:world?')
94 94 'hello~3aworld~3f'
95 95 >>> dec(b'hello~3aworld~3f')
96 96 'hello:world?'
97 97
98 98 >>> enc(b'the\\x07quick\\xADshot')
99 99 'the~07quick~adshot'
100 100 >>> dec(b'the~07quick~adshot')
101 101 'the\\x07quick\\xadshot'
102 102 '''
103 103 e = '_'
104 104 xchr = pycompat.bytechr
105 105 asciistr = list(map(xchr, range(127)))
106 106 capitals = list(range(ord("A"), ord("Z") + 1))
107 107
108 108 cmap = dict((x, x) for x in asciistr)
109 109 for x in _reserved():
110 110 cmap[xchr(x)] = "~%02x" % x
111 111 for x in capitals + [ord(e)]:
112 112 cmap[xchr(x)] = e + xchr(x).lower()
113 113
114 114 dmap = {}
115 115 for k, v in cmap.iteritems():
116 116 dmap[v] = k
117 117 def decode(s):
118 118 i = 0
119 119 while i < len(s):
120 120 for l in xrange(1, 4):
121 121 try:
122 122 yield dmap[s[i:i + l]]
123 123 i += l
124 124 break
125 125 except KeyError:
126 126 pass
127 127 else:
128 128 raise KeyError
129 129 return (lambda s: ''.join([cmap[s[c:c + 1]] for c in xrange(len(s))]),
130 130 lambda s: ''.join(list(decode(s))))
131 131
132 132 _encodefname, _decodefname = _buildencodefun()
133 133
134 134 def encodefilename(s):
135 135 '''
136 136 >>> encodefilename(b'foo.i/bar.d/bla.hg/hi:world?/HELLO')
137 137 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
138 138 '''
139 139 return _encodefname(encodedir(s))
140 140
141 141 def decodefilename(s):
142 142 '''
143 143 >>> decodefilename(b'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
144 144 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
145 145 '''
146 146 return decodedir(_decodefname(s))
147 147
148 148 def _buildlowerencodefun():
149 149 '''
150 150 >>> f = _buildlowerencodefun()
151 151 >>> f(b'nothing/special.txt')
152 152 'nothing/special.txt'
153 153 >>> f(b'HELLO')
154 154 'hello'
155 155 >>> f(b'hello:world?')
156 156 'hello~3aworld~3f'
157 157 >>> f(b'the\\x07quick\\xADshot')
158 158 'the~07quick~adshot'
159 159 '''
160 160 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
161 161 for x in _reserved():
162 162 cmap[chr(x)] = "~%02x" % x
163 163 for x in range(ord("A"), ord("Z") + 1):
164 164 cmap[chr(x)] = chr(x).lower()
165 return lambda s: "".join([cmap[c] for c in s])
165 def lowerencode(s):
166 return "".join([cmap[c] for c in s])
167 return lowerencode
166 168
167 169 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
168 170
169 171 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
170 172 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
171 173 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
172 174 def _auxencode(path, dotencode):
173 175 '''
174 176 Encodes filenames containing names reserved by Windows or which end in
175 177 period or space. Does not touch other single reserved characters c.
176 178 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
177 179 Additionally encodes space or period at the beginning, if dotencode is
178 180 True. Parameter path is assumed to be all lowercase.
179 181 A segment only needs encoding if a reserved name appears as a
180 182 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
181 183 doesn't need encoding.
182 184
183 185 >>> s = b'.foo/aux.txt/txt.aux/con/prn/nul/foo.'
184 186 >>> _auxencode(s.split(b'/'), True)
185 187 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
186 188 >>> s = b'.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
187 189 >>> _auxencode(s.split(b'/'), False)
188 190 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
189 191 >>> _auxencode([b'foo. '], True)
190 192 ['foo.~20']
191 193 >>> _auxencode([b' .foo'], True)
192 194 ['~20.foo']
193 195 '''
194 196 for i, n in enumerate(path):
195 197 if not n:
196 198 continue
197 199 if dotencode and n[0] in '. ':
198 200 n = "~%02x" % ord(n[0:1]) + n[1:]
199 201 path[i] = n
200 202 else:
201 203 l = n.find('.')
202 204 if l == -1:
203 205 l = len(n)
204 206 if ((l == 3 and n[:3] in _winres3) or
205 207 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
206 208 and n[:3] in _winres4)):
207 209 # encode third letter ('aux' -> 'au~78')
208 210 ec = "~%02x" % ord(n[2:3])
209 211 n = n[0:2] + ec + n[3:]
210 212 path[i] = n
211 213 if n[-1] in '. ':
212 214 # encode last period or space ('foo...' -> 'foo..~2e')
213 215 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
214 216 return path
215 217
216 218 _maxstorepathlen = 120
217 219 _dirprefixlen = 8
218 220 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
219 221
220 222 def _hashencode(path, dotencode):
221 223 digest = hashlib.sha1(path).hexdigest()
222 224 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
223 225 parts = _auxencode(le, dotencode)
224 226 basename = parts[-1]
225 227 _root, ext = os.path.splitext(basename)
226 228 sdirs = []
227 229 sdirslen = 0
228 230 for p in parts[:-1]:
229 231 d = p[:_dirprefixlen]
230 232 if d[-1] in '. ':
231 233 # Windows can't access dirs ending in period or space
232 234 d = d[:-1] + '_'
233 235 if sdirslen == 0:
234 236 t = len(d)
235 237 else:
236 238 t = sdirslen + 1 + len(d)
237 239 if t > _maxshortdirslen:
238 240 break
239 241 sdirs.append(d)
240 242 sdirslen = t
241 243 dirs = '/'.join(sdirs)
242 244 if len(dirs) > 0:
243 245 dirs += '/'
244 246 res = 'dh/' + dirs + digest + ext
245 247 spaceleft = _maxstorepathlen - len(res)
246 248 if spaceleft > 0:
247 249 filler = basename[:spaceleft]
248 250 res = 'dh/' + dirs + filler + digest + ext
249 251 return res
250 252
251 253 def _hybridencode(path, dotencode):
252 254 '''encodes path with a length limit
253 255
254 256 Encodes all paths that begin with 'data/', according to the following.
255 257
256 258 Default encoding (reversible):
257 259
258 260 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
259 261 characters are encoded as '~xx', where xx is the two digit hex code
260 262 of the character (see encodefilename).
261 263 Relevant path components consisting of Windows reserved filenames are
262 264 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
263 265
264 266 Hashed encoding (not reversible):
265 267
266 268 If the default-encoded path is longer than _maxstorepathlen, a
267 269 non-reversible hybrid hashing of the path is done instead.
268 270 This encoding uses up to _dirprefixlen characters of all directory
269 271 levels of the lowerencoded path, but not more levels than can fit into
270 272 _maxshortdirslen.
271 273 Then follows the filler followed by the sha digest of the full path.
272 274 The filler is the beginning of the basename of the lowerencoded path
273 275 (the basename is everything after the last path separator). The filler
274 276 is as long as possible, filling in characters from the basename until
275 277 the encoded path has _maxstorepathlen characters (or all chars of the
276 278 basename have been taken).
277 279 The extension (e.g. '.i' or '.d') is preserved.
278 280
279 281 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
280 282 encoding was used.
281 283 '''
282 284 path = encodedir(path)
283 285 ef = _encodefname(path).split('/')
284 286 res = '/'.join(_auxencode(ef, dotencode))
285 287 if len(res) > _maxstorepathlen:
286 288 res = _hashencode(path, dotencode)
287 289 return res
288 290
289 291 def _pathencode(path):
290 292 de = encodedir(path)
291 293 if len(path) > _maxstorepathlen:
292 294 return _hashencode(de, True)
293 295 ef = _encodefname(de).split('/')
294 296 res = '/'.join(_auxencode(ef, True))
295 297 if len(res) > _maxstorepathlen:
296 298 return _hashencode(de, True)
297 299 return res
298 300
299 301 _pathencode = getattr(parsers, 'pathencode', _pathencode)
300 302
301 303 def _plainhybridencode(f):
302 304 return _hybridencode(f, False)
303 305
304 306 def _calcmode(vfs):
305 307 try:
306 308 # files in .hg/ will be created using this mode
307 309 mode = vfs.stat().st_mode
308 310 # avoid some useless chmods
309 311 if (0o777 & ~util.umask) == (0o777 & mode):
310 312 mode = None
311 313 except OSError:
312 314 mode = None
313 315 return mode
314 316
315 317 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
316 318 ' phaseroots obsstore')
317 319
318 320 class basicstore(object):
319 321 '''base class for local repository stores'''
320 322 def __init__(self, path, vfstype):
321 323 vfs = vfstype(path)
322 324 self.path = vfs.base
323 325 self.createmode = _calcmode(vfs)
324 326 vfs.createmode = self.createmode
325 327 self.rawvfs = vfs
326 328 self.vfs = vfsmod.filtervfs(vfs, encodedir)
327 329 self.opener = self.vfs
328 330
329 331 def join(self, f):
330 332 return self.path + '/' + encodedir(f)
331 333
332 334 def _walk(self, relpath, recurse):
333 335 '''yields (unencoded, encoded, size)'''
334 336 path = self.path
335 337 if relpath:
336 338 path += '/' + relpath
337 339 striplen = len(self.path) + 1
338 340 l = []
339 341 if self.rawvfs.isdir(path):
340 342 visit = [path]
341 343 readdir = self.rawvfs.readdir
342 344 while visit:
343 345 p = visit.pop()
344 346 for f, kind, st in readdir(p, stat=True):
345 347 fp = p + '/' + f
346 348 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
347 349 n = util.pconvert(fp[striplen:])
348 350 l.append((decodedir(n), n, st.st_size))
349 351 elif kind == stat.S_IFDIR and recurse:
350 352 visit.append(fp)
351 353 l.sort()
352 354 return l
353 355
354 356 def datafiles(self):
355 357 return self._walk('data', True) + self._walk('meta', True)
356 358
357 359 def topfiles(self):
358 360 # yield manifest before changelog
359 361 return reversed(self._walk('', False))
360 362
361 363 def walk(self):
362 364 '''yields (unencoded, encoded, size)'''
363 365 # yield data files first
364 366 for x in self.datafiles():
365 367 yield x
366 368 for x in self.topfiles():
367 369 yield x
368 370
369 371 def copylist(self):
370 372 return ['requires'] + _data.split()
371 373
372 374 def write(self, tr):
373 375 pass
374 376
375 377 def invalidatecaches(self):
376 378 pass
377 379
378 380 def markremoved(self, fn):
379 381 pass
380 382
381 383 def __contains__(self, path):
382 384 '''Checks if the store contains path'''
383 385 path = "/".join(("data", path))
384 386 # file?
385 387 if self.vfs.exists(path + ".i"):
386 388 return True
387 389 # dir?
388 390 if not path.endswith("/"):
389 391 path = path + "/"
390 392 return self.vfs.exists(path)
391 393
392 394 class encodedstore(basicstore):
393 395 def __init__(self, path, vfstype):
394 396 vfs = vfstype(path + '/store')
395 397 self.path = vfs.base
396 398 self.createmode = _calcmode(vfs)
397 399 vfs.createmode = self.createmode
398 400 self.rawvfs = vfs
399 401 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
400 402 self.opener = self.vfs
401 403
402 404 def datafiles(self):
403 405 for a, b, size in super(encodedstore, self).datafiles():
404 406 try:
405 407 a = decodefilename(a)
406 408 except KeyError:
407 409 a = None
408 410 yield a, b, size
409 411
410 412 def join(self, f):
411 413 return self.path + '/' + encodefilename(f)
412 414
413 415 def copylist(self):
414 416 return (['requires', '00changelog.i'] +
415 417 ['store/' + f for f in _data.split()])
416 418
417 419 class fncache(object):
418 420 # the filename used to be partially encoded
419 421 # hence the encodedir/decodedir dance
420 422 def __init__(self, vfs):
421 423 self.vfs = vfs
422 424 self.entries = None
423 425 self._dirty = False
424 426
425 427 def _load(self):
426 428 '''fill the entries from the fncache file'''
427 429 self._dirty = False
428 430 try:
429 431 fp = self.vfs('fncache', mode='rb')
430 432 except IOError:
431 433 # skip nonexistent file
432 434 self.entries = set()
433 435 return
434 436 self.entries = set(decodedir(fp.read()).splitlines())
435 437 if '' in self.entries:
436 438 fp.seek(0)
437 439 for n, line in enumerate(util.iterfile(fp)):
438 440 if not line.rstrip('\n'):
439 441 t = _('invalid entry in fncache, line %d') % (n + 1)
440 442 raise error.Abort(t)
441 443 fp.close()
442 444
443 445 def write(self, tr):
444 446 if self._dirty:
445 447 tr.addbackup('fncache')
446 448 fp = self.vfs('fncache', mode='wb', atomictemp=True)
447 449 if self.entries:
448 450 fp.write(encodedir('\n'.join(self.entries) + '\n'))
449 451 fp.close()
450 452 self._dirty = False
451 453
452 454 def add(self, fn):
453 455 if self.entries is None:
454 456 self._load()
455 457 if fn not in self.entries:
456 458 self._dirty = True
457 459 self.entries.add(fn)
458 460
459 461 def remove(self, fn):
460 462 if self.entries is None:
461 463 self._load()
462 464 try:
463 465 self.entries.remove(fn)
464 466 self._dirty = True
465 467 except KeyError:
466 468 pass
467 469
468 470 def __contains__(self, fn):
469 471 if self.entries is None:
470 472 self._load()
471 473 return fn in self.entries
472 474
473 475 def __iter__(self):
474 476 if self.entries is None:
475 477 self._load()
476 478 return iter(self.entries)
477 479
478 480 class _fncachevfs(vfsmod.abstractvfs, vfsmod.proxyvfs):
479 481 def __init__(self, vfs, fnc, encode):
480 482 vfsmod.proxyvfs.__init__(self, vfs)
481 483 self.fncache = fnc
482 484 self.encode = encode
483 485
484 486 def __call__(self, path, mode='r', *args, **kw):
485 487 if mode not in ('r', 'rb') and (path.startswith('data/') or
486 488 path.startswith('meta/')):
487 489 self.fncache.add(path)
488 490 return self.vfs(self.encode(path), mode, *args, **kw)
489 491
490 492 def join(self, path):
491 493 if path:
492 494 return self.vfs.join(self.encode(path))
493 495 else:
494 496 return self.vfs.join(path)
495 497
496 498 class fncachestore(basicstore):
497 499 def __init__(self, path, vfstype, dotencode):
498 500 if dotencode:
499 501 encode = _pathencode
500 502 else:
501 503 encode = _plainhybridencode
502 504 self.encode = encode
503 505 vfs = vfstype(path + '/store')
504 506 self.path = vfs.base
505 507 self.pathsep = self.path + '/'
506 508 self.createmode = _calcmode(vfs)
507 509 vfs.createmode = self.createmode
508 510 self.rawvfs = vfs
509 511 fnc = fncache(vfs)
510 512 self.fncache = fnc
511 513 self.vfs = _fncachevfs(vfs, fnc, encode)
512 514 self.opener = self.vfs
513 515
514 516 def join(self, f):
515 517 return self.pathsep + self.encode(f)
516 518
517 519 def getsize(self, path):
518 520 return self.rawvfs.stat(path).st_size
519 521
520 522 def datafiles(self):
521 523 for f in sorted(self.fncache):
522 524 ef = self.encode(f)
523 525 try:
524 526 yield f, ef, self.getsize(ef)
525 527 except OSError as err:
526 528 if err.errno != errno.ENOENT:
527 529 raise
528 530
529 531 def copylist(self):
530 532 d = ('data meta dh fncache phaseroots obsstore'
531 533 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
532 534 return (['requires', '00changelog.i'] +
533 535 ['store/' + f for f in d.split()])
534 536
535 537 def write(self, tr):
536 538 self.fncache.write(tr)
537 539
538 540 def invalidatecaches(self):
539 541 self.fncache.entries = None
540 542
541 543 def markremoved(self, fn):
542 544 self.fncache.remove(fn)
543 545
544 546 def _exists(self, f):
545 547 ef = self.encode(f)
546 548 try:
547 549 self.getsize(ef)
548 550 return True
549 551 except OSError as err:
550 552 if err.errno != errno.ENOENT:
551 553 raise
552 554 # nonexistent entry
553 555 return False
554 556
555 557 def __contains__(self, path):
556 558 '''Checks if the store contains path'''
557 559 path = "/".join(("data", path))
558 560 # check for files (exact match)
559 561 e = path + '.i'
560 562 if e in self.fncache and self._exists(e):
561 563 return True
562 564 # now check for directories (prefix match)
563 565 if not path.endswith('/'):
564 566 path += '/'
565 567 for e in self.fncache:
566 568 if e.startswith(path) and self._exists(e):
567 569 return True
568 570 return False
569 571
570 572 def store(requirements, path, vfstype):
571 573 if 'store' in requirements:
572 574 if 'fncache' in requirements:
573 575 return fncachestore(path, vfstype, 'dotencode' in requirements)
574 576 return encodedstore(path, vfstype)
575 577 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now