##// END OF EJS Templates
store: use hashlib.sha1 directly instead of through util...
Augie Fackler -
r29338:81c38cb9 default
parent child Browse files
Show More
@@ -1,568 +1,567
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 import hashlib
11 12 import os
12 13 import stat
13 14
14 15 from .i18n import _
15 16 from . import (
16 17 error,
17 18 parsers,
18 19 scmutil,
19 20 util,
20 21 )
21 22
22 _sha = util.sha1
23
24 23 # This avoids a collision between a file named foo and a dir named
25 24 # foo.i or foo.d
26 25 def _encodedir(path):
27 26 '''
28 27 >>> _encodedir('data/foo.i')
29 28 'data/foo.i'
30 29 >>> _encodedir('data/foo.i/bla.i')
31 30 'data/foo.i.hg/bla.i'
32 31 >>> _encodedir('data/foo.i.hg/bla.i')
33 32 'data/foo.i.hg.hg/bla.i'
34 33 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
35 34 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
36 35 '''
37 36 return (path
38 37 .replace(".hg/", ".hg.hg/")
39 38 .replace(".i/", ".i.hg/")
40 39 .replace(".d/", ".d.hg/"))
41 40
42 41 encodedir = getattr(parsers, 'encodedir', _encodedir)
43 42
44 43 def decodedir(path):
45 44 '''
46 45 >>> decodedir('data/foo.i')
47 46 'data/foo.i'
48 47 >>> decodedir('data/foo.i.hg/bla.i')
49 48 'data/foo.i/bla.i'
50 49 >>> decodedir('data/foo.i.hg.hg/bla.i')
51 50 'data/foo.i.hg/bla.i'
52 51 '''
53 52 if ".hg/" not in path:
54 53 return path
55 54 return (path
56 55 .replace(".d.hg/", ".d/")
57 56 .replace(".i.hg/", ".i/")
58 57 .replace(".hg.hg/", ".hg/"))
59 58
60 59 def _reserved():
61 60 ''' characters that are problematic for filesystems
62 61
63 62 * ascii escapes (0..31)
64 63 * ascii hi (126..255)
65 64 * windows specials
66 65
67 66 these characters will be escaped by encodefunctions
68 67 '''
69 68 winreserved = [ord(x) for x in '\\:*?"<>|']
70 69 for x in range(32):
71 70 yield x
72 71 for x in range(126, 256):
73 72 yield x
74 73 for x in winreserved:
75 74 yield x
76 75
77 76 def _buildencodefun():
78 77 '''
79 78 >>> enc, dec = _buildencodefun()
80 79
81 80 >>> enc('nothing/special.txt')
82 81 'nothing/special.txt'
83 82 >>> dec('nothing/special.txt')
84 83 'nothing/special.txt'
85 84
86 85 >>> enc('HELLO')
87 86 '_h_e_l_l_o'
88 87 >>> dec('_h_e_l_l_o')
89 88 'HELLO'
90 89
91 90 >>> enc('hello:world?')
92 91 'hello~3aworld~3f'
93 92 >>> dec('hello~3aworld~3f')
94 93 'hello:world?'
95 94
96 95 >>> enc('the\x07quick\xADshot')
97 96 'the~07quick~adshot'
98 97 >>> dec('the~07quick~adshot')
99 98 'the\\x07quick\\xadshot'
100 99 '''
101 100 e = '_'
102 101 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
103 102 for x in _reserved():
104 103 cmap[chr(x)] = "~%02x" % x
105 104 for x in list(range(ord("A"), ord("Z") + 1)) + [ord(e)]:
106 105 cmap[chr(x)] = e + chr(x).lower()
107 106 dmap = {}
108 107 for k, v in cmap.iteritems():
109 108 dmap[v] = k
110 109 def decode(s):
111 110 i = 0
112 111 while i < len(s):
113 112 for l in xrange(1, 4):
114 113 try:
115 114 yield dmap[s[i:i + l]]
116 115 i += l
117 116 break
118 117 except KeyError:
119 118 pass
120 119 else:
121 120 raise KeyError
122 121 return (lambda s: ''.join([cmap[c] for c in s]),
123 122 lambda s: ''.join(list(decode(s))))
124 123
125 124 _encodefname, _decodefname = _buildencodefun()
126 125
127 126 def encodefilename(s):
128 127 '''
129 128 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
130 129 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
131 130 '''
132 131 return _encodefname(encodedir(s))
133 132
134 133 def decodefilename(s):
135 134 '''
136 135 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
137 136 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
138 137 '''
139 138 return decodedir(_decodefname(s))
140 139
141 140 def _buildlowerencodefun():
142 141 '''
143 142 >>> f = _buildlowerencodefun()
144 143 >>> f('nothing/special.txt')
145 144 'nothing/special.txt'
146 145 >>> f('HELLO')
147 146 'hello'
148 147 >>> f('hello:world?')
149 148 'hello~3aworld~3f'
150 149 >>> f('the\x07quick\xADshot')
151 150 'the~07quick~adshot'
152 151 '''
153 152 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
154 153 for x in _reserved():
155 154 cmap[chr(x)] = "~%02x" % x
156 155 for x in range(ord("A"), ord("Z") + 1):
157 156 cmap[chr(x)] = chr(x).lower()
158 157 return lambda s: "".join([cmap[c] for c in s])
159 158
160 159 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
161 160
162 161 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
163 162 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
164 163 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
165 164 def _auxencode(path, dotencode):
166 165 '''
167 166 Encodes filenames containing names reserved by Windows or which end in
168 167 period or space. Does not touch other single reserved characters c.
169 168 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
170 169 Additionally encodes space or period at the beginning, if dotencode is
171 170 True. Parameter path is assumed to be all lowercase.
172 171 A segment only needs encoding if a reserved name appears as a
173 172 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
174 173 doesn't need encoding.
175 174
176 175 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
177 176 >>> _auxencode(s.split('/'), True)
178 177 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
179 178 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
180 179 >>> _auxencode(s.split('/'), False)
181 180 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
182 181 >>> _auxencode(['foo. '], True)
183 182 ['foo.~20']
184 183 >>> _auxencode([' .foo'], True)
185 184 ['~20.foo']
186 185 '''
187 186 for i, n in enumerate(path):
188 187 if not n:
189 188 continue
190 189 if dotencode and n[0] in '. ':
191 190 n = "~%02x" % ord(n[0]) + n[1:]
192 191 path[i] = n
193 192 else:
194 193 l = n.find('.')
195 194 if l == -1:
196 195 l = len(n)
197 196 if ((l == 3 and n[:3] in _winres3) or
198 197 (l == 4 and n[3] <= '9' and n[3] >= '1'
199 198 and n[:3] in _winres4)):
200 199 # encode third letter ('aux' -> 'au~78')
201 200 ec = "~%02x" % ord(n[2])
202 201 n = n[0:2] + ec + n[3:]
203 202 path[i] = n
204 203 if n[-1] in '. ':
205 204 # encode last period or space ('foo...' -> 'foo..~2e')
206 205 path[i] = n[:-1] + "~%02x" % ord(n[-1])
207 206 return path
208 207
209 208 _maxstorepathlen = 120
210 209 _dirprefixlen = 8
211 210 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
212 211
213 212 def _hashencode(path, dotencode):
214 digest = _sha(path).hexdigest()
213 digest = hashlib.sha1(path).hexdigest()
215 214 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
216 215 parts = _auxencode(le, dotencode)
217 216 basename = parts[-1]
218 217 _root, ext = os.path.splitext(basename)
219 218 sdirs = []
220 219 sdirslen = 0
221 220 for p in parts[:-1]:
222 221 d = p[:_dirprefixlen]
223 222 if d[-1] in '. ':
224 223 # Windows can't access dirs ending in period or space
225 224 d = d[:-1] + '_'
226 225 if sdirslen == 0:
227 226 t = len(d)
228 227 else:
229 228 t = sdirslen + 1 + len(d)
230 229 if t > _maxshortdirslen:
231 230 break
232 231 sdirs.append(d)
233 232 sdirslen = t
234 233 dirs = '/'.join(sdirs)
235 234 if len(dirs) > 0:
236 235 dirs += '/'
237 236 res = 'dh/' + dirs + digest + ext
238 237 spaceleft = _maxstorepathlen - len(res)
239 238 if spaceleft > 0:
240 239 filler = basename[:spaceleft]
241 240 res = 'dh/' + dirs + filler + digest + ext
242 241 return res
243 242
244 243 def _hybridencode(path, dotencode):
245 244 '''encodes path with a length limit
246 245
247 246 Encodes all paths that begin with 'data/', according to the following.
248 247
249 248 Default encoding (reversible):
250 249
251 250 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
252 251 characters are encoded as '~xx', where xx is the two digit hex code
253 252 of the character (see encodefilename).
254 253 Relevant path components consisting of Windows reserved filenames are
255 254 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
256 255
257 256 Hashed encoding (not reversible):
258 257
259 258 If the default-encoded path is longer than _maxstorepathlen, a
260 259 non-reversible hybrid hashing of the path is done instead.
261 260 This encoding uses up to _dirprefixlen characters of all directory
262 261 levels of the lowerencoded path, but not more levels than can fit into
263 262 _maxshortdirslen.
264 263 Then follows the filler followed by the sha digest of the full path.
265 264 The filler is the beginning of the basename of the lowerencoded path
266 265 (the basename is everything after the last path separator). The filler
267 266 is as long as possible, filling in characters from the basename until
268 267 the encoded path has _maxstorepathlen characters (or all chars of the
269 268 basename have been taken).
270 269 The extension (e.g. '.i' or '.d') is preserved.
271 270
272 271 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
273 272 encoding was used.
274 273 '''
275 274 path = encodedir(path)
276 275 ef = _encodefname(path).split('/')
277 276 res = '/'.join(_auxencode(ef, dotencode))
278 277 if len(res) > _maxstorepathlen:
279 278 res = _hashencode(path, dotencode)
280 279 return res
281 280
282 281 def _pathencode(path):
283 282 de = encodedir(path)
284 283 if len(path) > _maxstorepathlen:
285 284 return _hashencode(de, True)
286 285 ef = _encodefname(de).split('/')
287 286 res = '/'.join(_auxencode(ef, True))
288 287 if len(res) > _maxstorepathlen:
289 288 return _hashencode(de, True)
290 289 return res
291 290
292 291 _pathencode = getattr(parsers, 'pathencode', _pathencode)
293 292
294 293 def _plainhybridencode(f):
295 294 return _hybridencode(f, False)
296 295
297 296 def _calcmode(vfs):
298 297 try:
299 298 # files in .hg/ will be created using this mode
300 299 mode = vfs.stat().st_mode
301 300 # avoid some useless chmods
302 301 if (0o777 & ~util.umask) == (0o777 & mode):
303 302 mode = None
304 303 except OSError:
305 304 mode = None
306 305 return mode
307 306
308 307 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
309 308 ' phaseroots obsstore')
310 309
311 310 class basicstore(object):
312 311 '''base class for local repository stores'''
313 312 def __init__(self, path, vfstype):
314 313 vfs = vfstype(path)
315 314 self.path = vfs.base
316 315 self.createmode = _calcmode(vfs)
317 316 vfs.createmode = self.createmode
318 317 self.rawvfs = vfs
319 318 self.vfs = scmutil.filtervfs(vfs, encodedir)
320 319 self.opener = self.vfs
321 320
322 321 def join(self, f):
323 322 return self.path + '/' + encodedir(f)
324 323
325 324 def _walk(self, relpath, recurse):
326 325 '''yields (unencoded, encoded, size)'''
327 326 path = self.path
328 327 if relpath:
329 328 path += '/' + relpath
330 329 striplen = len(self.path) + 1
331 330 l = []
332 331 if self.rawvfs.isdir(path):
333 332 visit = [path]
334 333 readdir = self.rawvfs.readdir
335 334 while visit:
336 335 p = visit.pop()
337 336 for f, kind, st in readdir(p, stat=True):
338 337 fp = p + '/' + f
339 338 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
340 339 n = util.pconvert(fp[striplen:])
341 340 l.append((decodedir(n), n, st.st_size))
342 341 elif kind == stat.S_IFDIR and recurse:
343 342 visit.append(fp)
344 343 l.sort()
345 344 return l
346 345
347 346 def datafiles(self):
348 347 return self._walk('data', True) + self._walk('meta', True)
349 348
350 349 def topfiles(self):
351 350 # yield manifest before changelog
352 351 return reversed(self._walk('', False))
353 352
354 353 def walk(self):
355 354 '''yields (unencoded, encoded, size)'''
356 355 # yield data files first
357 356 for x in self.datafiles():
358 357 yield x
359 358 for x in self.topfiles():
360 359 yield x
361 360
362 361 def copylist(self):
363 362 return ['requires'] + _data.split()
364 363
365 364 def write(self, tr):
366 365 pass
367 366
368 367 def invalidatecaches(self):
369 368 pass
370 369
371 370 def markremoved(self, fn):
372 371 pass
373 372
374 373 def __contains__(self, path):
375 374 '''Checks if the store contains path'''
376 375 path = "/".join(("data", path))
377 376 # file?
378 377 if self.vfs.exists(path + ".i"):
379 378 return True
380 379 # dir?
381 380 if not path.endswith("/"):
382 381 path = path + "/"
383 382 return self.vfs.exists(path)
384 383
385 384 class encodedstore(basicstore):
386 385 def __init__(self, path, vfstype):
387 386 vfs = vfstype(path + '/store')
388 387 self.path = vfs.base
389 388 self.createmode = _calcmode(vfs)
390 389 vfs.createmode = self.createmode
391 390 self.rawvfs = vfs
392 391 self.vfs = scmutil.filtervfs(vfs, encodefilename)
393 392 self.opener = self.vfs
394 393
395 394 def datafiles(self):
396 395 for a, b, size in super(encodedstore, self).datafiles():
397 396 try:
398 397 a = decodefilename(a)
399 398 except KeyError:
400 399 a = None
401 400 yield a, b, size
402 401
403 402 def join(self, f):
404 403 return self.path + '/' + encodefilename(f)
405 404
406 405 def copylist(self):
407 406 return (['requires', '00changelog.i'] +
408 407 ['store/' + f for f in _data.split()])
409 408
410 409 class fncache(object):
411 410 # the filename used to be partially encoded
412 411 # hence the encodedir/decodedir dance
413 412 def __init__(self, vfs):
414 413 self.vfs = vfs
415 414 self.entries = None
416 415 self._dirty = False
417 416
418 417 def _load(self):
419 418 '''fill the entries from the fncache file'''
420 419 self._dirty = False
421 420 try:
422 421 fp = self.vfs('fncache', mode='rb')
423 422 except IOError:
424 423 # skip nonexistent file
425 424 self.entries = set()
426 425 return
427 426 self.entries = set(decodedir(fp.read()).splitlines())
428 427 if '' in self.entries:
429 428 fp.seek(0)
430 429 for n, line in enumerate(fp):
431 430 if not line.rstrip('\n'):
432 431 t = _('invalid entry in fncache, line %d') % (n + 1)
433 432 raise error.Abort(t)
434 433 fp.close()
435 434
436 435 def write(self, tr):
437 436 if self._dirty:
438 437 tr.addbackup('fncache')
439 438 fp = self.vfs('fncache', mode='wb', atomictemp=True)
440 439 if self.entries:
441 440 fp.write(encodedir('\n'.join(self.entries) + '\n'))
442 441 fp.close()
443 442 self._dirty = False
444 443
445 444 def add(self, fn):
446 445 if self.entries is None:
447 446 self._load()
448 447 if fn not in self.entries:
449 448 self._dirty = True
450 449 self.entries.add(fn)
451 450
452 451 def remove(self, fn):
453 452 if self.entries is None:
454 453 self._load()
455 454 try:
456 455 self.entries.remove(fn)
457 456 self._dirty = True
458 457 except KeyError:
459 458 pass
460 459
461 460 def __contains__(self, fn):
462 461 if self.entries is None:
463 462 self._load()
464 463 return fn in self.entries
465 464
466 465 def __iter__(self):
467 466 if self.entries is None:
468 467 self._load()
469 468 return iter(self.entries)
470 469
471 470 class _fncachevfs(scmutil.abstractvfs, scmutil.auditvfs):
472 471 def __init__(self, vfs, fnc, encode):
473 472 scmutil.auditvfs.__init__(self, vfs)
474 473 self.fncache = fnc
475 474 self.encode = encode
476 475
477 476 def __call__(self, path, mode='r', *args, **kw):
478 477 if mode not in ('r', 'rb') and (path.startswith('data/') or
479 478 path.startswith('meta/')):
480 479 self.fncache.add(path)
481 480 return self.vfs(self.encode(path), mode, *args, **kw)
482 481
483 482 def join(self, path):
484 483 if path:
485 484 return self.vfs.join(self.encode(path))
486 485 else:
487 486 return self.vfs.join(path)
488 487
489 488 class fncachestore(basicstore):
490 489 def __init__(self, path, vfstype, dotencode):
491 490 if dotencode:
492 491 encode = _pathencode
493 492 else:
494 493 encode = _plainhybridencode
495 494 self.encode = encode
496 495 vfs = vfstype(path + '/store')
497 496 self.path = vfs.base
498 497 self.pathsep = self.path + '/'
499 498 self.createmode = _calcmode(vfs)
500 499 vfs.createmode = self.createmode
501 500 self.rawvfs = vfs
502 501 fnc = fncache(vfs)
503 502 self.fncache = fnc
504 503 self.vfs = _fncachevfs(vfs, fnc, encode)
505 504 self.opener = self.vfs
506 505
507 506 def join(self, f):
508 507 return self.pathsep + self.encode(f)
509 508
510 509 def getsize(self, path):
511 510 return self.rawvfs.stat(path).st_size
512 511
513 512 def datafiles(self):
514 513 for f in sorted(self.fncache):
515 514 ef = self.encode(f)
516 515 try:
517 516 yield f, ef, self.getsize(ef)
518 517 except OSError as err:
519 518 if err.errno != errno.ENOENT:
520 519 raise
521 520
522 521 def copylist(self):
523 522 d = ('data meta dh fncache phaseroots obsstore'
524 523 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
525 524 return (['requires', '00changelog.i'] +
526 525 ['store/' + f for f in d.split()])
527 526
528 527 def write(self, tr):
529 528 self.fncache.write(tr)
530 529
531 530 def invalidatecaches(self):
532 531 self.fncache.entries = None
533 532
534 533 def markremoved(self, fn):
535 534 self.fncache.remove(fn)
536 535
537 536 def _exists(self, f):
538 537 ef = self.encode(f)
539 538 try:
540 539 self.getsize(ef)
541 540 return True
542 541 except OSError as err:
543 542 if err.errno != errno.ENOENT:
544 543 raise
545 544 # nonexistent entry
546 545 return False
547 546
548 547 def __contains__(self, path):
549 548 '''Checks if the store contains path'''
550 549 path = "/".join(("data", path))
551 550 # check for files (exact match)
552 551 e = path + '.i'
553 552 if e in self.fncache and self._exists(e):
554 553 return True
555 554 # now check for directories (prefix match)
556 555 if not path.endswith('/'):
557 556 path += '/'
558 557 for e in self.fncache:
559 558 if e.startswith(path) and self._exists(e):
560 559 return True
561 560 return False
562 561
563 562 def store(requirements, path, vfstype):
564 563 if 'store' in requirements:
565 564 if 'fncache' in requirements:
566 565 return fncachestore(path, vfstype, 'dotencode' in requirements)
567 566 return encodedstore(path, vfstype)
568 567 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now