##// END OF EJS Templates
store: fix many single-byte ops to use slicing in _auxencode
Augie Fackler -
r31362:50cd8134 default
parent child Browse files
Show More
@@ -1,573 +1,573 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 parsers,
19 19 pycompat,
20 20 util,
21 21 vfs as vfsmod,
22 22 )
23 23
24 24 # This avoids a collision between a file named foo and a dir named
25 25 # foo.i or foo.d
26 26 def _encodedir(path):
27 27 '''
28 28 >>> _encodedir('data/foo.i')
29 29 'data/foo.i'
30 30 >>> _encodedir('data/foo.i/bla.i')
31 31 'data/foo.i.hg/bla.i'
32 32 >>> _encodedir('data/foo.i.hg/bla.i')
33 33 'data/foo.i.hg.hg/bla.i'
34 34 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
35 35 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
36 36 '''
37 37 return (path
38 38 .replace(".hg/", ".hg.hg/")
39 39 .replace(".i/", ".i.hg/")
40 40 .replace(".d/", ".d.hg/"))
41 41
42 42 encodedir = getattr(parsers, 'encodedir', _encodedir)
43 43
44 44 def decodedir(path):
45 45 '''
46 46 >>> decodedir('data/foo.i')
47 47 'data/foo.i'
48 48 >>> decodedir('data/foo.i.hg/bla.i')
49 49 'data/foo.i/bla.i'
50 50 >>> decodedir('data/foo.i.hg.hg/bla.i')
51 51 'data/foo.i.hg/bla.i'
52 52 '''
53 53 if ".hg/" not in path:
54 54 return path
55 55 return (path
56 56 .replace(".d.hg/", ".d/")
57 57 .replace(".i.hg/", ".i/")
58 58 .replace(".hg.hg/", ".hg/"))
59 59
60 60 def _reserved():
61 61 ''' characters that are problematic for filesystems
62 62
63 63 * ascii escapes (0..31)
64 64 * ascii hi (126..255)
65 65 * windows specials
66 66
67 67 these characters will be escaped by encodefunctions
68 68 '''
69 69 winreserved = [ord(x) for x in u'\\:*?"<>|']
70 70 for x in range(32):
71 71 yield x
72 72 for x in range(126, 256):
73 73 yield x
74 74 for x in winreserved:
75 75 yield x
76 76
77 77 def _buildencodefun():
78 78 '''
79 79 >>> enc, dec = _buildencodefun()
80 80
81 81 >>> enc('nothing/special.txt')
82 82 'nothing/special.txt'
83 83 >>> dec('nothing/special.txt')
84 84 'nothing/special.txt'
85 85
86 86 >>> enc('HELLO')
87 87 '_h_e_l_l_o'
88 88 >>> dec('_h_e_l_l_o')
89 89 'HELLO'
90 90
91 91 >>> enc('hello:world?')
92 92 'hello~3aworld~3f'
93 93 >>> dec('hello~3aworld~3f')
94 94 'hello:world?'
95 95
96 96 >>> enc('the\x07quick\xADshot')
97 97 'the~07quick~adshot'
98 98 >>> dec('the~07quick~adshot')
99 99 'the\\x07quick\\xadshot'
100 100 '''
101 101 e = '_'
102 102 xchr = pycompat.bytechr
103 103 asciistr = list(map(xchr, range(127)))
104 104 capitals = list(range(ord("A"), ord("Z") + 1))
105 105
106 106 cmap = dict((x, x) for x in asciistr)
107 107 for x in _reserved():
108 108 cmap[xchr(x)] = "~%02x" % x
109 109 for x in capitals + [ord(e)]:
110 110 cmap[xchr(x)] = e + xchr(x).lower()
111 111
112 112 dmap = {}
113 113 for k, v in cmap.iteritems():
114 114 dmap[v] = k
115 115 def decode(s):
116 116 i = 0
117 117 while i < len(s):
118 118 for l in xrange(1, 4):
119 119 try:
120 120 yield dmap[s[i:i + l]]
121 121 i += l
122 122 break
123 123 except KeyError:
124 124 pass
125 125 else:
126 126 raise KeyError
127 127 return (lambda s: ''.join([cmap[s[c:c + 1]] for c in xrange(len(s))]),
128 128 lambda s: ''.join(list(decode(s))))
129 129
130 130 _encodefname, _decodefname = _buildencodefun()
131 131
132 132 def encodefilename(s):
133 133 '''
134 134 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
135 135 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
136 136 '''
137 137 return _encodefname(encodedir(s))
138 138
139 139 def decodefilename(s):
140 140 '''
141 141 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
142 142 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
143 143 '''
144 144 return decodedir(_decodefname(s))
145 145
146 146 def _buildlowerencodefun():
147 147 '''
148 148 >>> f = _buildlowerencodefun()
149 149 >>> f('nothing/special.txt')
150 150 'nothing/special.txt'
151 151 >>> f('HELLO')
152 152 'hello'
153 153 >>> f('hello:world?')
154 154 'hello~3aworld~3f'
155 155 >>> f('the\x07quick\xADshot')
156 156 'the~07quick~adshot'
157 157 '''
158 158 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
159 159 for x in _reserved():
160 160 cmap[chr(x)] = "~%02x" % x
161 161 for x in range(ord("A"), ord("Z") + 1):
162 162 cmap[chr(x)] = chr(x).lower()
163 163 return lambda s: "".join([cmap[c] for c in s])
164 164
165 165 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
166 166
167 167 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
168 168 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
169 169 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
170 170 def _auxencode(path, dotencode):
171 171 '''
172 172 Encodes filenames containing names reserved by Windows or which end in
173 173 period or space. Does not touch other single reserved characters c.
174 174 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
175 175 Additionally encodes space or period at the beginning, if dotencode is
176 176 True. Parameter path is assumed to be all lowercase.
177 177 A segment only needs encoding if a reserved name appears as a
178 178 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
179 179 doesn't need encoding.
180 180
181 181 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
182 182 >>> _auxencode(s.split('/'), True)
183 183 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
184 184 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
185 185 >>> _auxencode(s.split('/'), False)
186 186 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
187 187 >>> _auxencode(['foo. '], True)
188 188 ['foo.~20']
189 189 >>> _auxencode([' .foo'], True)
190 190 ['~20.foo']
191 191 '''
192 192 for i, n in enumerate(path):
193 193 if not n:
194 194 continue
195 195 if dotencode and n[0] in '. ':
196 n = "~%02x" % ord(n[0]) + n[1:]
196 n = "~%02x" % ord(n[0:1]) + n[1:]
197 197 path[i] = n
198 198 else:
199 199 l = n.find('.')
200 200 if l == -1:
201 201 l = len(n)
202 202 if ((l == 3 and n[:3] in _winres3) or
203 (l == 4 and n[3] <= '9' and n[3] >= '1'
203 (l == 4 and n[3:4] <= '9' and n[3:4] >= '1'
204 204 and n[:3] in _winres4)):
205 205 # encode third letter ('aux' -> 'au~78')
206 ec = "~%02x" % ord(n[2])
206 ec = "~%02x" % ord(n[2:3])
207 207 n = n[0:2] + ec + n[3:]
208 208 path[i] = n
209 209 if n[-1] in '. ':
210 210 # encode last period or space ('foo...' -> 'foo..~2e')
211 path[i] = n[:-1] + "~%02x" % ord(n[-1])
211 path[i] = n[:-1] + "~%02x" % ord(n[-1:])
212 212 return path
213 213
214 214 _maxstorepathlen = 120
215 215 _dirprefixlen = 8
216 216 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
217 217
218 218 def _hashencode(path, dotencode):
219 219 digest = hashlib.sha1(path).hexdigest()
220 220 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
221 221 parts = _auxencode(le, dotencode)
222 222 basename = parts[-1]
223 223 _root, ext = os.path.splitext(basename)
224 224 sdirs = []
225 225 sdirslen = 0
226 226 for p in parts[:-1]:
227 227 d = p[:_dirprefixlen]
228 228 if d[-1] in '. ':
229 229 # Windows can't access dirs ending in period or space
230 230 d = d[:-1] + '_'
231 231 if sdirslen == 0:
232 232 t = len(d)
233 233 else:
234 234 t = sdirslen + 1 + len(d)
235 235 if t > _maxshortdirslen:
236 236 break
237 237 sdirs.append(d)
238 238 sdirslen = t
239 239 dirs = '/'.join(sdirs)
240 240 if len(dirs) > 0:
241 241 dirs += '/'
242 242 res = 'dh/' + dirs + digest + ext
243 243 spaceleft = _maxstorepathlen - len(res)
244 244 if spaceleft > 0:
245 245 filler = basename[:spaceleft]
246 246 res = 'dh/' + dirs + filler + digest + ext
247 247 return res
248 248
249 249 def _hybridencode(path, dotencode):
250 250 '''encodes path with a length limit
251 251
252 252 Encodes all paths that begin with 'data/', according to the following.
253 253
254 254 Default encoding (reversible):
255 255
256 256 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
257 257 characters are encoded as '~xx', where xx is the two digit hex code
258 258 of the character (see encodefilename).
259 259 Relevant path components consisting of Windows reserved filenames are
260 260 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
261 261
262 262 Hashed encoding (not reversible):
263 263
264 264 If the default-encoded path is longer than _maxstorepathlen, a
265 265 non-reversible hybrid hashing of the path is done instead.
266 266 This encoding uses up to _dirprefixlen characters of all directory
267 267 levels of the lowerencoded path, but not more levels than can fit into
268 268 _maxshortdirslen.
269 269 Then follows the filler followed by the sha digest of the full path.
270 270 The filler is the beginning of the basename of the lowerencoded path
271 271 (the basename is everything after the last path separator). The filler
272 272 is as long as possible, filling in characters from the basename until
273 273 the encoded path has _maxstorepathlen characters (or all chars of the
274 274 basename have been taken).
275 275 The extension (e.g. '.i' or '.d') is preserved.
276 276
277 277 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
278 278 encoding was used.
279 279 '''
280 280 path = encodedir(path)
281 281 ef = _encodefname(path).split('/')
282 282 res = '/'.join(_auxencode(ef, dotencode))
283 283 if len(res) > _maxstorepathlen:
284 284 res = _hashencode(path, dotencode)
285 285 return res
286 286
287 287 def _pathencode(path):
288 288 de = encodedir(path)
289 289 if len(path) > _maxstorepathlen:
290 290 return _hashencode(de, True)
291 291 ef = _encodefname(de).split('/')
292 292 res = '/'.join(_auxencode(ef, True))
293 293 if len(res) > _maxstorepathlen:
294 294 return _hashencode(de, True)
295 295 return res
296 296
297 297 _pathencode = getattr(parsers, 'pathencode', _pathencode)
298 298
299 299 def _plainhybridencode(f):
300 300 return _hybridencode(f, False)
301 301
302 302 def _calcmode(vfs):
303 303 try:
304 304 # files in .hg/ will be created using this mode
305 305 mode = vfs.stat().st_mode
306 306 # avoid some useless chmods
307 307 if (0o777 & ~util.umask) == (0o777 & mode):
308 308 mode = None
309 309 except OSError:
310 310 mode = None
311 311 return mode
312 312
313 313 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
314 314 ' phaseroots obsstore')
315 315
316 316 class basicstore(object):
317 317 '''base class for local repository stores'''
318 318 def __init__(self, path, vfstype):
319 319 vfs = vfstype(path)
320 320 self.path = vfs.base
321 321 self.createmode = _calcmode(vfs)
322 322 vfs.createmode = self.createmode
323 323 self.rawvfs = vfs
324 324 self.vfs = vfsmod.filtervfs(vfs, encodedir)
325 325 self.opener = self.vfs
326 326
327 327 def join(self, f):
328 328 return self.path + '/' + encodedir(f)
329 329
330 330 def _walk(self, relpath, recurse):
331 331 '''yields (unencoded, encoded, size)'''
332 332 path = self.path
333 333 if relpath:
334 334 path += '/' + relpath
335 335 striplen = len(self.path) + 1
336 336 l = []
337 337 if self.rawvfs.isdir(path):
338 338 visit = [path]
339 339 readdir = self.rawvfs.readdir
340 340 while visit:
341 341 p = visit.pop()
342 342 for f, kind, st in readdir(p, stat=True):
343 343 fp = p + '/' + f
344 344 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
345 345 n = util.pconvert(fp[striplen:])
346 346 l.append((decodedir(n), n, st.st_size))
347 347 elif kind == stat.S_IFDIR and recurse:
348 348 visit.append(fp)
349 349 l.sort()
350 350 return l
351 351
352 352 def datafiles(self):
353 353 return self._walk('data', True) + self._walk('meta', True)
354 354
355 355 def topfiles(self):
356 356 # yield manifest before changelog
357 357 return reversed(self._walk('', False))
358 358
359 359 def walk(self):
360 360 '''yields (unencoded, encoded, size)'''
361 361 # yield data files first
362 362 for x in self.datafiles():
363 363 yield x
364 364 for x in self.topfiles():
365 365 yield x
366 366
367 367 def copylist(self):
368 368 return ['requires'] + _data.split()
369 369
370 370 def write(self, tr):
371 371 pass
372 372
373 373 def invalidatecaches(self):
374 374 pass
375 375
376 376 def markremoved(self, fn):
377 377 pass
378 378
379 379 def __contains__(self, path):
380 380 '''Checks if the store contains path'''
381 381 path = "/".join(("data", path))
382 382 # file?
383 383 if self.vfs.exists(path + ".i"):
384 384 return True
385 385 # dir?
386 386 if not path.endswith("/"):
387 387 path = path + "/"
388 388 return self.vfs.exists(path)
389 389
390 390 class encodedstore(basicstore):
391 391 def __init__(self, path, vfstype):
392 392 vfs = vfstype(path + '/store')
393 393 self.path = vfs.base
394 394 self.createmode = _calcmode(vfs)
395 395 vfs.createmode = self.createmode
396 396 self.rawvfs = vfs
397 397 self.vfs = vfsmod.filtervfs(vfs, encodefilename)
398 398 self.opener = self.vfs
399 399
400 400 def datafiles(self):
401 401 for a, b, size in super(encodedstore, self).datafiles():
402 402 try:
403 403 a = decodefilename(a)
404 404 except KeyError:
405 405 a = None
406 406 yield a, b, size
407 407
408 408 def join(self, f):
409 409 return self.path + '/' + encodefilename(f)
410 410
411 411 def copylist(self):
412 412 return (['requires', '00changelog.i'] +
413 413 ['store/' + f for f in _data.split()])
414 414
415 415 class fncache(object):
416 416 # the filename used to be partially encoded
417 417 # hence the encodedir/decodedir dance
418 418 def __init__(self, vfs):
419 419 self.vfs = vfs
420 420 self.entries = None
421 421 self._dirty = False
422 422
423 423 def _load(self):
424 424 '''fill the entries from the fncache file'''
425 425 self._dirty = False
426 426 try:
427 427 fp = self.vfs('fncache', mode='rb')
428 428 except IOError:
429 429 # skip nonexistent file
430 430 self.entries = set()
431 431 return
432 432 self.entries = set(decodedir(fp.read()).splitlines())
433 433 if '' in self.entries:
434 434 fp.seek(0)
435 435 for n, line in enumerate(util.iterfile(fp)):
436 436 if not line.rstrip('\n'):
437 437 t = _('invalid entry in fncache, line %d') % (n + 1)
438 438 raise error.Abort(t)
439 439 fp.close()
440 440
441 441 def write(self, tr):
442 442 if self._dirty:
443 443 tr.addbackup('fncache')
444 444 fp = self.vfs('fncache', mode='wb', atomictemp=True)
445 445 if self.entries:
446 446 fp.write(encodedir('\n'.join(self.entries) + '\n'))
447 447 fp.close()
448 448 self._dirty = False
449 449
450 450 def add(self, fn):
451 451 if self.entries is None:
452 452 self._load()
453 453 if fn not in self.entries:
454 454 self._dirty = True
455 455 self.entries.add(fn)
456 456
457 457 def remove(self, fn):
458 458 if self.entries is None:
459 459 self._load()
460 460 try:
461 461 self.entries.remove(fn)
462 462 self._dirty = True
463 463 except KeyError:
464 464 pass
465 465
466 466 def __contains__(self, fn):
467 467 if self.entries is None:
468 468 self._load()
469 469 return fn in self.entries
470 470
471 471 def __iter__(self):
472 472 if self.entries is None:
473 473 self._load()
474 474 return iter(self.entries)
475 475
476 476 class _fncachevfs(vfsmod.abstractvfs, vfsmod.auditvfs):
477 477 def __init__(self, vfs, fnc, encode):
478 478 vfsmod.auditvfs.__init__(self, vfs)
479 479 self.fncache = fnc
480 480 self.encode = encode
481 481
482 482 def __call__(self, path, mode='r', *args, **kw):
483 483 if mode not in ('r', 'rb') and (path.startswith('data/') or
484 484 path.startswith('meta/')):
485 485 self.fncache.add(path)
486 486 return self.vfs(self.encode(path), mode, *args, **kw)
487 487
488 488 def join(self, path):
489 489 if path:
490 490 return self.vfs.join(self.encode(path))
491 491 else:
492 492 return self.vfs.join(path)
493 493
494 494 class fncachestore(basicstore):
495 495 def __init__(self, path, vfstype, dotencode):
496 496 if dotencode:
497 497 encode = _pathencode
498 498 else:
499 499 encode = _plainhybridencode
500 500 self.encode = encode
501 501 vfs = vfstype(path + '/store')
502 502 self.path = vfs.base
503 503 self.pathsep = self.path + '/'
504 504 self.createmode = _calcmode(vfs)
505 505 vfs.createmode = self.createmode
506 506 self.rawvfs = vfs
507 507 fnc = fncache(vfs)
508 508 self.fncache = fnc
509 509 self.vfs = _fncachevfs(vfs, fnc, encode)
510 510 self.opener = self.vfs
511 511
512 512 def join(self, f):
513 513 return self.pathsep + self.encode(f)
514 514
515 515 def getsize(self, path):
516 516 return self.rawvfs.stat(path).st_size
517 517
518 518 def datafiles(self):
519 519 for f in sorted(self.fncache):
520 520 ef = self.encode(f)
521 521 try:
522 522 yield f, ef, self.getsize(ef)
523 523 except OSError as err:
524 524 if err.errno != errno.ENOENT:
525 525 raise
526 526
527 527 def copylist(self):
528 528 d = ('data meta dh fncache phaseroots obsstore'
529 529 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
530 530 return (['requires', '00changelog.i'] +
531 531 ['store/' + f for f in d.split()])
532 532
533 533 def write(self, tr):
534 534 self.fncache.write(tr)
535 535
536 536 def invalidatecaches(self):
537 537 self.fncache.entries = None
538 538
539 539 def markremoved(self, fn):
540 540 self.fncache.remove(fn)
541 541
542 542 def _exists(self, f):
543 543 ef = self.encode(f)
544 544 try:
545 545 self.getsize(ef)
546 546 return True
547 547 except OSError as err:
548 548 if err.errno != errno.ENOENT:
549 549 raise
550 550 # nonexistent entry
551 551 return False
552 552
553 553 def __contains__(self, path):
554 554 '''Checks if the store contains path'''
555 555 path = "/".join(("data", path))
556 556 # check for files (exact match)
557 557 e = path + '.i'
558 558 if e in self.fncache and self._exists(e):
559 559 return True
560 560 # now check for directories (prefix match)
561 561 if not path.endswith('/'):
562 562 path += '/'
563 563 for e in self.fncache:
564 564 if e.startswith(path) and self._exists(e):
565 565 return True
566 566 return False
567 567
568 568 def store(requirements, path, vfstype):
569 569 if 'store' in requirements:
570 570 if 'fncache' in requirements:
571 571 return fncachestore(path, vfstype, 'dotencode' in requirements)
572 572 return encodedstore(path, vfstype)
573 573 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now