##// END OF EJS Templates
store: migrate to util.iterfile
Jun Wu -
r30398:b63bef41 default
parent child Browse files
Show More
@@ -1,577 +1,577
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 parsers,
19 19 pycompat,
20 20 scmutil,
21 21 util,
22 22 )
23 23
24 24 # This avoids a collision between a file named foo and a dir named
25 25 # foo.i or foo.d
26 26 def _encodedir(path):
27 27 '''
28 28 >>> _encodedir('data/foo.i')
29 29 'data/foo.i'
30 30 >>> _encodedir('data/foo.i/bla.i')
31 31 'data/foo.i.hg/bla.i'
32 32 >>> _encodedir('data/foo.i.hg/bla.i')
33 33 'data/foo.i.hg.hg/bla.i'
34 34 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
35 35 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
36 36 '''
37 37 return (path
38 38 .replace(".hg/", ".hg.hg/")
39 39 .replace(".i/", ".i.hg/")
40 40 .replace(".d/", ".d.hg/"))
41 41
42 42 encodedir = getattr(parsers, 'encodedir', _encodedir)
43 43
44 44 def decodedir(path):
45 45 '''
46 46 >>> decodedir('data/foo.i')
47 47 'data/foo.i'
48 48 >>> decodedir('data/foo.i.hg/bla.i')
49 49 'data/foo.i/bla.i'
50 50 >>> decodedir('data/foo.i.hg.hg/bla.i')
51 51 'data/foo.i.hg/bla.i'
52 52 '''
53 53 if ".hg/" not in path:
54 54 return path
55 55 return (path
56 56 .replace(".d.hg/", ".d/")
57 57 .replace(".i.hg/", ".i/")
58 58 .replace(".hg.hg/", ".hg/"))
59 59
60 60 def _reserved():
61 61 ''' characters that are problematic for filesystems
62 62
63 63 * ascii escapes (0..31)
64 64 * ascii hi (126..255)
65 65 * windows specials
66 66
67 67 these characters will be escaped by encodefunctions
68 68 '''
69 69 winreserved = [ord(x) for x in u'\\:*?"<>|']
70 70 for x in range(32):
71 71 yield x
72 72 for x in range(126, 256):
73 73 yield x
74 74 for x in winreserved:
75 75 yield x
76 76
77 77 def _buildencodefun():
78 78 '''
79 79 >>> enc, dec = _buildencodefun()
80 80
81 81 >>> enc('nothing/special.txt')
82 82 'nothing/special.txt'
83 83 >>> dec('nothing/special.txt')
84 84 'nothing/special.txt'
85 85
86 86 >>> enc('HELLO')
87 87 '_h_e_l_l_o'
88 88 >>> dec('_h_e_l_l_o')
89 89 'HELLO'
90 90
91 91 >>> enc('hello:world?')
92 92 'hello~3aworld~3f'
93 93 >>> dec('hello~3aworld~3f')
94 94 'hello:world?'
95 95
96 96 >>> enc('the\x07quick\xADshot')
97 97 'the~07quick~adshot'
98 98 >>> dec('the~07quick~adshot')
99 99 'the\\x07quick\\xadshot'
100 100 '''
101 101 e = '_'
102 102 if pycompat.ispy3:
103 103 xchr = lambda x: bytes([x])
104 104 asciistr = bytes(xrange(127))
105 105 else:
106 106 xchr = chr
107 107 asciistr = map(chr, xrange(127))
108 108 capitals = list(range(ord("A"), ord("Z") + 1))
109 109
110 110 cmap = dict((x, x) for x in asciistr)
111 111 for x in _reserved():
112 112 cmap[xchr(x)] = "~%02x" % x
113 113 for x in capitals + [ord(e)]:
114 114 cmap[xchr(x)] = e + xchr(x).lower()
115 115
116 116 dmap = {}
117 117 for k, v in cmap.iteritems():
118 118 dmap[v] = k
119 119 def decode(s):
120 120 i = 0
121 121 while i < len(s):
122 122 for l in xrange(1, 4):
123 123 try:
124 124 yield dmap[s[i:i + l]]
125 125 i += l
126 126 break
127 127 except KeyError:
128 128 pass
129 129 else:
130 130 raise KeyError
131 131 return (lambda s: ''.join([cmap[c] for c in s]),
132 132 lambda s: ''.join(list(decode(s))))
133 133
134 134 _encodefname, _decodefname = _buildencodefun()
135 135
136 136 def encodefilename(s):
137 137 '''
138 138 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
139 139 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
140 140 '''
141 141 return _encodefname(encodedir(s))
142 142
143 143 def decodefilename(s):
144 144 '''
145 145 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
146 146 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
147 147 '''
148 148 return decodedir(_decodefname(s))
149 149
150 150 def _buildlowerencodefun():
151 151 '''
152 152 >>> f = _buildlowerencodefun()
153 153 >>> f('nothing/special.txt')
154 154 'nothing/special.txt'
155 155 >>> f('HELLO')
156 156 'hello'
157 157 >>> f('hello:world?')
158 158 'hello~3aworld~3f'
159 159 >>> f('the\x07quick\xADshot')
160 160 'the~07quick~adshot'
161 161 '''
162 162 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
163 163 for x in _reserved():
164 164 cmap[chr(x)] = "~%02x" % x
165 165 for x in range(ord("A"), ord("Z") + 1):
166 166 cmap[chr(x)] = chr(x).lower()
167 167 return lambda s: "".join([cmap[c] for c in s])
168 168
169 169 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
170 170
171 171 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
172 172 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
173 173 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
174 174 def _auxencode(path, dotencode):
175 175 '''
176 176 Encodes filenames containing names reserved by Windows or which end in
177 177 period or space. Does not touch other single reserved characters c.
178 178 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
179 179 Additionally encodes space or period at the beginning, if dotencode is
180 180 True. Parameter path is assumed to be all lowercase.
181 181 A segment only needs encoding if a reserved name appears as a
182 182 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
183 183 doesn't need encoding.
184 184
185 185 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
186 186 >>> _auxencode(s.split('/'), True)
187 187 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
188 188 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
189 189 >>> _auxencode(s.split('/'), False)
190 190 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
191 191 >>> _auxencode(['foo. '], True)
192 192 ['foo.~20']
193 193 >>> _auxencode([' .foo'], True)
194 194 ['~20.foo']
195 195 '''
196 196 for i, n in enumerate(path):
197 197 if not n:
198 198 continue
199 199 if dotencode and n[0] in '. ':
200 200 n = "~%02x" % ord(n[0]) + n[1:]
201 201 path[i] = n
202 202 else:
203 203 l = n.find('.')
204 204 if l == -1:
205 205 l = len(n)
206 206 if ((l == 3 and n[:3] in _winres3) or
207 207 (l == 4 and n[3] <= '9' and n[3] >= '1'
208 208 and n[:3] in _winres4)):
209 209 # encode third letter ('aux' -> 'au~78')
210 210 ec = "~%02x" % ord(n[2])
211 211 n = n[0:2] + ec + n[3:]
212 212 path[i] = n
213 213 if n[-1] in '. ':
214 214 # encode last period or space ('foo...' -> 'foo..~2e')
215 215 path[i] = n[:-1] + "~%02x" % ord(n[-1])
216 216 return path
217 217
218 218 _maxstorepathlen = 120
219 219 _dirprefixlen = 8
220 220 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
221 221
222 222 def _hashencode(path, dotencode):
223 223 digest = hashlib.sha1(path).hexdigest()
224 224 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
225 225 parts = _auxencode(le, dotencode)
226 226 basename = parts[-1]
227 227 _root, ext = os.path.splitext(basename)
228 228 sdirs = []
229 229 sdirslen = 0
230 230 for p in parts[:-1]:
231 231 d = p[:_dirprefixlen]
232 232 if d[-1] in '. ':
233 233 # Windows can't access dirs ending in period or space
234 234 d = d[:-1] + '_'
235 235 if sdirslen == 0:
236 236 t = len(d)
237 237 else:
238 238 t = sdirslen + 1 + len(d)
239 239 if t > _maxshortdirslen:
240 240 break
241 241 sdirs.append(d)
242 242 sdirslen = t
243 243 dirs = '/'.join(sdirs)
244 244 if len(dirs) > 0:
245 245 dirs += '/'
246 246 res = 'dh/' + dirs + digest + ext
247 247 spaceleft = _maxstorepathlen - len(res)
248 248 if spaceleft > 0:
249 249 filler = basename[:spaceleft]
250 250 res = 'dh/' + dirs + filler + digest + ext
251 251 return res
252 252
253 253 def _hybridencode(path, dotencode):
254 254 '''encodes path with a length limit
255 255
256 256 Encodes all paths that begin with 'data/', according to the following.
257 257
258 258 Default encoding (reversible):
259 259
260 260 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
261 261 characters are encoded as '~xx', where xx is the two digit hex code
262 262 of the character (see encodefilename).
263 263 Relevant path components consisting of Windows reserved filenames are
264 264 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
265 265
266 266 Hashed encoding (not reversible):
267 267
268 268 If the default-encoded path is longer than _maxstorepathlen, a
269 269 non-reversible hybrid hashing of the path is done instead.
270 270 This encoding uses up to _dirprefixlen characters of all directory
271 271 levels of the lowerencoded path, but not more levels than can fit into
272 272 _maxshortdirslen.
273 273 Then follows the filler followed by the sha digest of the full path.
274 274 The filler is the beginning of the basename of the lowerencoded path
275 275 (the basename is everything after the last path separator). The filler
276 276 is as long as possible, filling in characters from the basename until
277 277 the encoded path has _maxstorepathlen characters (or all chars of the
278 278 basename have been taken).
279 279 The extension (e.g. '.i' or '.d') is preserved.
280 280
281 281 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
282 282 encoding was used.
283 283 '''
284 284 path = encodedir(path)
285 285 ef = _encodefname(path).split('/')
286 286 res = '/'.join(_auxencode(ef, dotencode))
287 287 if len(res) > _maxstorepathlen:
288 288 res = _hashencode(path, dotencode)
289 289 return res
290 290
291 291 def _pathencode(path):
292 292 de = encodedir(path)
293 293 if len(path) > _maxstorepathlen:
294 294 return _hashencode(de, True)
295 295 ef = _encodefname(de).split('/')
296 296 res = '/'.join(_auxencode(ef, True))
297 297 if len(res) > _maxstorepathlen:
298 298 return _hashencode(de, True)
299 299 return res
300 300
301 301 _pathencode = getattr(parsers, 'pathencode', _pathencode)
302 302
303 303 def _plainhybridencode(f):
304 304 return _hybridencode(f, False)
305 305
306 306 def _calcmode(vfs):
307 307 try:
308 308 # files in .hg/ will be created using this mode
309 309 mode = vfs.stat().st_mode
310 310 # avoid some useless chmods
311 311 if (0o777 & ~util.umask) == (0o777 & mode):
312 312 mode = None
313 313 except OSError:
314 314 mode = None
315 315 return mode
316 316
317 317 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
318 318 ' phaseroots obsstore')
319 319
320 320 class basicstore(object):
321 321 '''base class for local repository stores'''
322 322 def __init__(self, path, vfstype):
323 323 vfs = vfstype(path)
324 324 self.path = vfs.base
325 325 self.createmode = _calcmode(vfs)
326 326 vfs.createmode = self.createmode
327 327 self.rawvfs = vfs
328 328 self.vfs = scmutil.filtervfs(vfs, encodedir)
329 329 self.opener = self.vfs
330 330
331 331 def join(self, f):
332 332 return self.path + '/' + encodedir(f)
333 333
334 334 def _walk(self, relpath, recurse):
335 335 '''yields (unencoded, encoded, size)'''
336 336 path = self.path
337 337 if relpath:
338 338 path += '/' + relpath
339 339 striplen = len(self.path) + 1
340 340 l = []
341 341 if self.rawvfs.isdir(path):
342 342 visit = [path]
343 343 readdir = self.rawvfs.readdir
344 344 while visit:
345 345 p = visit.pop()
346 346 for f, kind, st in readdir(p, stat=True):
347 347 fp = p + '/' + f
348 348 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
349 349 n = util.pconvert(fp[striplen:])
350 350 l.append((decodedir(n), n, st.st_size))
351 351 elif kind == stat.S_IFDIR and recurse:
352 352 visit.append(fp)
353 353 l.sort()
354 354 return l
355 355
356 356 def datafiles(self):
357 357 return self._walk('data', True) + self._walk('meta', True)
358 358
359 359 def topfiles(self):
360 360 # yield manifest before changelog
361 361 return reversed(self._walk('', False))
362 362
363 363 def walk(self):
364 364 '''yields (unencoded, encoded, size)'''
365 365 # yield data files first
366 366 for x in self.datafiles():
367 367 yield x
368 368 for x in self.topfiles():
369 369 yield x
370 370
371 371 def copylist(self):
372 372 return ['requires'] + _data.split()
373 373
374 374 def write(self, tr):
375 375 pass
376 376
377 377 def invalidatecaches(self):
378 378 pass
379 379
380 380 def markremoved(self, fn):
381 381 pass
382 382
383 383 def __contains__(self, path):
384 384 '''Checks if the store contains path'''
385 385 path = "/".join(("data", path))
386 386 # file?
387 387 if self.vfs.exists(path + ".i"):
388 388 return True
389 389 # dir?
390 390 if not path.endswith("/"):
391 391 path = path + "/"
392 392 return self.vfs.exists(path)
393 393
394 394 class encodedstore(basicstore):
395 395 def __init__(self, path, vfstype):
396 396 vfs = vfstype(path + '/store')
397 397 self.path = vfs.base
398 398 self.createmode = _calcmode(vfs)
399 399 vfs.createmode = self.createmode
400 400 self.rawvfs = vfs
401 401 self.vfs = scmutil.filtervfs(vfs, encodefilename)
402 402 self.opener = self.vfs
403 403
404 404 def datafiles(self):
405 405 for a, b, size in super(encodedstore, self).datafiles():
406 406 try:
407 407 a = decodefilename(a)
408 408 except KeyError:
409 409 a = None
410 410 yield a, b, size
411 411
412 412 def join(self, f):
413 413 return self.path + '/' + encodefilename(f)
414 414
415 415 def copylist(self):
416 416 return (['requires', '00changelog.i'] +
417 417 ['store/' + f for f in _data.split()])
418 418
419 419 class fncache(object):
420 420 # the filename used to be partially encoded
421 421 # hence the encodedir/decodedir dance
422 422 def __init__(self, vfs):
423 423 self.vfs = vfs
424 424 self.entries = None
425 425 self._dirty = False
426 426
427 427 def _load(self):
428 428 '''fill the entries from the fncache file'''
429 429 self._dirty = False
430 430 try:
431 431 fp = self.vfs('fncache', mode='rb')
432 432 except IOError:
433 433 # skip nonexistent file
434 434 self.entries = set()
435 435 return
436 436 self.entries = set(decodedir(fp.read()).splitlines())
437 437 if '' in self.entries:
438 438 fp.seek(0)
439 for n, line in enumerate(fp):
439 for n, line in enumerate(util.iterfile(fp)):
440 440 if not line.rstrip('\n'):
441 441 t = _('invalid entry in fncache, line %d') % (n + 1)
442 442 raise error.Abort(t)
443 443 fp.close()
444 444
445 445 def write(self, tr):
446 446 if self._dirty:
447 447 tr.addbackup('fncache')
448 448 fp = self.vfs('fncache', mode='wb', atomictemp=True)
449 449 if self.entries:
450 450 fp.write(encodedir('\n'.join(self.entries) + '\n'))
451 451 fp.close()
452 452 self._dirty = False
453 453
454 454 def add(self, fn):
455 455 if self.entries is None:
456 456 self._load()
457 457 if fn not in self.entries:
458 458 self._dirty = True
459 459 self.entries.add(fn)
460 460
461 461 def remove(self, fn):
462 462 if self.entries is None:
463 463 self._load()
464 464 try:
465 465 self.entries.remove(fn)
466 466 self._dirty = True
467 467 except KeyError:
468 468 pass
469 469
470 470 def __contains__(self, fn):
471 471 if self.entries is None:
472 472 self._load()
473 473 return fn in self.entries
474 474
475 475 def __iter__(self):
476 476 if self.entries is None:
477 477 self._load()
478 478 return iter(self.entries)
479 479
480 480 class _fncachevfs(scmutil.abstractvfs, scmutil.auditvfs):
481 481 def __init__(self, vfs, fnc, encode):
482 482 scmutil.auditvfs.__init__(self, vfs)
483 483 self.fncache = fnc
484 484 self.encode = encode
485 485
486 486 def __call__(self, path, mode='r', *args, **kw):
487 487 if mode not in ('r', 'rb') and (path.startswith('data/') or
488 488 path.startswith('meta/')):
489 489 self.fncache.add(path)
490 490 return self.vfs(self.encode(path), mode, *args, **kw)
491 491
492 492 def join(self, path):
493 493 if path:
494 494 return self.vfs.join(self.encode(path))
495 495 else:
496 496 return self.vfs.join(path)
497 497
498 498 class fncachestore(basicstore):
499 499 def __init__(self, path, vfstype, dotencode):
500 500 if dotencode:
501 501 encode = _pathencode
502 502 else:
503 503 encode = _plainhybridencode
504 504 self.encode = encode
505 505 vfs = vfstype(path + '/store')
506 506 self.path = vfs.base
507 507 self.pathsep = self.path + '/'
508 508 self.createmode = _calcmode(vfs)
509 509 vfs.createmode = self.createmode
510 510 self.rawvfs = vfs
511 511 fnc = fncache(vfs)
512 512 self.fncache = fnc
513 513 self.vfs = _fncachevfs(vfs, fnc, encode)
514 514 self.opener = self.vfs
515 515
516 516 def join(self, f):
517 517 return self.pathsep + self.encode(f)
518 518
519 519 def getsize(self, path):
520 520 return self.rawvfs.stat(path).st_size
521 521
522 522 def datafiles(self):
523 523 for f in sorted(self.fncache):
524 524 ef = self.encode(f)
525 525 try:
526 526 yield f, ef, self.getsize(ef)
527 527 except OSError as err:
528 528 if err.errno != errno.ENOENT:
529 529 raise
530 530
531 531 def copylist(self):
532 532 d = ('data meta dh fncache phaseroots obsstore'
533 533 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
534 534 return (['requires', '00changelog.i'] +
535 535 ['store/' + f for f in d.split()])
536 536
537 537 def write(self, tr):
538 538 self.fncache.write(tr)
539 539
540 540 def invalidatecaches(self):
541 541 self.fncache.entries = None
542 542
543 543 def markremoved(self, fn):
544 544 self.fncache.remove(fn)
545 545
546 546 def _exists(self, f):
547 547 ef = self.encode(f)
548 548 try:
549 549 self.getsize(ef)
550 550 return True
551 551 except OSError as err:
552 552 if err.errno != errno.ENOENT:
553 553 raise
554 554 # nonexistent entry
555 555 return False
556 556
557 557 def __contains__(self, path):
558 558 '''Checks if the store contains path'''
559 559 path = "/".join(("data", path))
560 560 # check for files (exact match)
561 561 e = path + '.i'
562 562 if e in self.fncache and self._exists(e):
563 563 return True
564 564 # now check for directories (prefix match)
565 565 if not path.endswith('/'):
566 566 path += '/'
567 567 for e in self.fncache:
568 568 if e.startswith(path) and self._exists(e):
569 569 return True
570 570 return False
571 571
572 572 def store(requirements, path, vfstype):
573 573 if 'store' in requirements:
574 574 if 'fncache' in requirements:
575 575 return fncachestore(path, vfstype, 'dotencode' in requirements)
576 576 return encodedstore(path, vfstype)
577 577 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now