##// END OF EJS Templates
store: treat range as a generator instead of a list for py3 compat
timeless -
r29071:2f58975e default
parent child Browse files
Show More
@@ -1,553 +1,568 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import os
12 12 import stat
13 13
14 14 from .i18n import _
15 15 from . import (
16 16 error,
17 17 parsers,
18 18 scmutil,
19 19 util,
20 20 )
21 21
22 22 _sha = util.sha1
23 23
24 24 # This avoids a collision between a file named foo and a dir named
25 25 # foo.i or foo.d
26 26 def _encodedir(path):
27 27 '''
28 28 >>> _encodedir('data/foo.i')
29 29 'data/foo.i'
30 30 >>> _encodedir('data/foo.i/bla.i')
31 31 'data/foo.i.hg/bla.i'
32 32 >>> _encodedir('data/foo.i.hg/bla.i')
33 33 'data/foo.i.hg.hg/bla.i'
34 34 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
35 35 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
36 36 '''
37 37 return (path
38 38 .replace(".hg/", ".hg.hg/")
39 39 .replace(".i/", ".i.hg/")
40 40 .replace(".d/", ".d.hg/"))
41 41
42 42 encodedir = getattr(parsers, 'encodedir', _encodedir)
43 43
44 44 def decodedir(path):
45 45 '''
46 46 >>> decodedir('data/foo.i')
47 47 'data/foo.i'
48 48 >>> decodedir('data/foo.i.hg/bla.i')
49 49 'data/foo.i/bla.i'
50 50 >>> decodedir('data/foo.i.hg.hg/bla.i')
51 51 'data/foo.i.hg/bla.i'
52 52 '''
53 53 if ".hg/" not in path:
54 54 return path
55 55 return (path
56 56 .replace(".d.hg/", ".d/")
57 57 .replace(".i.hg/", ".i/")
58 58 .replace(".hg.hg/", ".hg/"))
59 59
60 def _reserved():
61 ''' characters that are problematic for filesystems
62
63 * ascii escapes (0..31)
64 * ascii hi (126..255)
65 * windows specials
66
67 these characters will be escaped by encodefunctions
68 '''
69 winreserved = [ord(x) for x in '\\:*?"<>|']
70 for x in range(32):
71 yield x
72 for x in range(126, 256):
73 yield x
74 for x in winreserved:
75 yield x
76
60 77 def _buildencodefun():
61 78 '''
62 79 >>> enc, dec = _buildencodefun()
63 80
64 81 >>> enc('nothing/special.txt')
65 82 'nothing/special.txt'
66 83 >>> dec('nothing/special.txt')
67 84 'nothing/special.txt'
68 85
69 86 >>> enc('HELLO')
70 87 '_h_e_l_l_o'
71 88 >>> dec('_h_e_l_l_o')
72 89 'HELLO'
73 90
74 91 >>> enc('hello:world?')
75 92 'hello~3aworld~3f'
76 93 >>> dec('hello~3aworld~3f')
77 94 'hello:world?'
78 95
79 96 >>> enc('the\x07quick\xADshot')
80 97 'the~07quick~adshot'
81 98 >>> dec('the~07quick~adshot')
82 99 'the\\x07quick\\xadshot'
83 100 '''
84 101 e = '_'
85 winreserved = [ord(x) for x in '\\:*?"<>|']
86 102 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
87 for x in (range(32) + range(126, 256) + winreserved):
103 for x in _reserved():
88 104 cmap[chr(x)] = "~%02x" % x
89 for x in range(ord("A"), ord("Z") + 1) + [ord(e)]:
105 for x in list(range(ord("A"), ord("Z") + 1)) + [ord(e)]:
90 106 cmap[chr(x)] = e + chr(x).lower()
91 107 dmap = {}
92 108 for k, v in cmap.iteritems():
93 109 dmap[v] = k
94 110 def decode(s):
95 111 i = 0
96 112 while i < len(s):
97 113 for l in xrange(1, 4):
98 114 try:
99 115 yield dmap[s[i:i + l]]
100 116 i += l
101 117 break
102 118 except KeyError:
103 119 pass
104 120 else:
105 121 raise KeyError
106 122 return (lambda s: ''.join([cmap[c] for c in s]),
107 123 lambda s: ''.join(list(decode(s))))
108 124
109 125 _encodefname, _decodefname = _buildencodefun()
110 126
111 127 def encodefilename(s):
112 128 '''
113 129 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
114 130 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
115 131 '''
116 132 return _encodefname(encodedir(s))
117 133
118 134 def decodefilename(s):
119 135 '''
120 136 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
121 137 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
122 138 '''
123 139 return decodedir(_decodefname(s))
124 140
125 141 def _buildlowerencodefun():
126 142 '''
127 143 >>> f = _buildlowerencodefun()
128 144 >>> f('nothing/special.txt')
129 145 'nothing/special.txt'
130 146 >>> f('HELLO')
131 147 'hello'
132 148 >>> f('hello:world?')
133 149 'hello~3aworld~3f'
134 150 >>> f('the\x07quick\xADshot')
135 151 'the~07quick~adshot'
136 152 '''
137 winreserved = [ord(x) for x in '\\:*?"<>|']
138 153 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
139 for x in (range(32) + range(126, 256) + winreserved):
154 for x in _reserved():
140 155 cmap[chr(x)] = "~%02x" % x
141 156 for x in range(ord("A"), ord("Z") + 1):
142 157 cmap[chr(x)] = chr(x).lower()
143 158 return lambda s: "".join([cmap[c] for c in s])
144 159
145 160 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
146 161
147 162 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
148 163 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
149 164 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
150 165 def _auxencode(path, dotencode):
151 166 '''
152 167 Encodes filenames containing names reserved by Windows or which end in
153 168 period or space. Does not touch other single reserved characters c.
154 169 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
155 170 Additionally encodes space or period at the beginning, if dotencode is
156 171 True. Parameter path is assumed to be all lowercase.
157 172 A segment only needs encoding if a reserved name appears as a
158 173 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
159 174 doesn't need encoding.
160 175
161 176 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
162 177 >>> _auxencode(s.split('/'), True)
163 178 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
164 179 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
165 180 >>> _auxencode(s.split('/'), False)
166 181 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
167 182 >>> _auxencode(['foo. '], True)
168 183 ['foo.~20']
169 184 >>> _auxencode([' .foo'], True)
170 185 ['~20.foo']
171 186 '''
172 187 for i, n in enumerate(path):
173 188 if not n:
174 189 continue
175 190 if dotencode and n[0] in '. ':
176 191 n = "~%02x" % ord(n[0]) + n[1:]
177 192 path[i] = n
178 193 else:
179 194 l = n.find('.')
180 195 if l == -1:
181 196 l = len(n)
182 197 if ((l == 3 and n[:3] in _winres3) or
183 198 (l == 4 and n[3] <= '9' and n[3] >= '1'
184 199 and n[:3] in _winres4)):
185 200 # encode third letter ('aux' -> 'au~78')
186 201 ec = "~%02x" % ord(n[2])
187 202 n = n[0:2] + ec + n[3:]
188 203 path[i] = n
189 204 if n[-1] in '. ':
190 205 # encode last period or space ('foo...' -> 'foo..~2e')
191 206 path[i] = n[:-1] + "~%02x" % ord(n[-1])
192 207 return path
193 208
194 209 _maxstorepathlen = 120
195 210 _dirprefixlen = 8
196 211 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
197 212
198 213 def _hashencode(path, dotencode):
199 214 digest = _sha(path).hexdigest()
200 215 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
201 216 parts = _auxencode(le, dotencode)
202 217 basename = parts[-1]
203 218 _root, ext = os.path.splitext(basename)
204 219 sdirs = []
205 220 sdirslen = 0
206 221 for p in parts[:-1]:
207 222 d = p[:_dirprefixlen]
208 223 if d[-1] in '. ':
209 224 # Windows can't access dirs ending in period or space
210 225 d = d[:-1] + '_'
211 226 if sdirslen == 0:
212 227 t = len(d)
213 228 else:
214 229 t = sdirslen + 1 + len(d)
215 230 if t > _maxshortdirslen:
216 231 break
217 232 sdirs.append(d)
218 233 sdirslen = t
219 234 dirs = '/'.join(sdirs)
220 235 if len(dirs) > 0:
221 236 dirs += '/'
222 237 res = 'dh/' + dirs + digest + ext
223 238 spaceleft = _maxstorepathlen - len(res)
224 239 if spaceleft > 0:
225 240 filler = basename[:spaceleft]
226 241 res = 'dh/' + dirs + filler + digest + ext
227 242 return res
228 243
229 244 def _hybridencode(path, dotencode):
230 245 '''encodes path with a length limit
231 246
232 247 Encodes all paths that begin with 'data/', according to the following.
233 248
234 249 Default encoding (reversible):
235 250
236 251 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
237 252 characters are encoded as '~xx', where xx is the two digit hex code
238 253 of the character (see encodefilename).
239 254 Relevant path components consisting of Windows reserved filenames are
240 255 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
241 256
242 257 Hashed encoding (not reversible):
243 258
244 259 If the default-encoded path is longer than _maxstorepathlen, a
245 260 non-reversible hybrid hashing of the path is done instead.
246 261 This encoding uses up to _dirprefixlen characters of all directory
247 262 levels of the lowerencoded path, but not more levels than can fit into
248 263 _maxshortdirslen.
249 264 Then follows the filler followed by the sha digest of the full path.
250 265 The filler is the beginning of the basename of the lowerencoded path
251 266 (the basename is everything after the last path separator). The filler
252 267 is as long as possible, filling in characters from the basename until
253 268 the encoded path has _maxstorepathlen characters (or all chars of the
254 269 basename have been taken).
255 270 The extension (e.g. '.i' or '.d') is preserved.
256 271
257 272 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
258 273 encoding was used.
259 274 '''
260 275 path = encodedir(path)
261 276 ef = _encodefname(path).split('/')
262 277 res = '/'.join(_auxencode(ef, dotencode))
263 278 if len(res) > _maxstorepathlen:
264 279 res = _hashencode(path, dotencode)
265 280 return res
266 281
267 282 def _pathencode(path):
268 283 de = encodedir(path)
269 284 if len(path) > _maxstorepathlen:
270 285 return _hashencode(de, True)
271 286 ef = _encodefname(de).split('/')
272 287 res = '/'.join(_auxencode(ef, True))
273 288 if len(res) > _maxstorepathlen:
274 289 return _hashencode(de, True)
275 290 return res
276 291
277 292 _pathencode = getattr(parsers, 'pathencode', _pathencode)
278 293
279 294 def _plainhybridencode(f):
280 295 return _hybridencode(f, False)
281 296
282 297 def _calcmode(vfs):
283 298 try:
284 299 # files in .hg/ will be created using this mode
285 300 mode = vfs.stat().st_mode
286 301 # avoid some useless chmods
287 302 if (0o777 & ~util.umask) == (0o777 & mode):
288 303 mode = None
289 304 except OSError:
290 305 mode = None
291 306 return mode
292 307
293 308 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
294 309 ' phaseroots obsstore')
295 310
296 311 class basicstore(object):
297 312 '''base class for local repository stores'''
298 313 def __init__(self, path, vfstype):
299 314 vfs = vfstype(path)
300 315 self.path = vfs.base
301 316 self.createmode = _calcmode(vfs)
302 317 vfs.createmode = self.createmode
303 318 self.rawvfs = vfs
304 319 self.vfs = scmutil.filtervfs(vfs, encodedir)
305 320 self.opener = self.vfs
306 321
307 322 def join(self, f):
308 323 return self.path + '/' + encodedir(f)
309 324
310 325 def _walk(self, relpath, recurse):
311 326 '''yields (unencoded, encoded, size)'''
312 327 path = self.path
313 328 if relpath:
314 329 path += '/' + relpath
315 330 striplen = len(self.path) + 1
316 331 l = []
317 332 if self.rawvfs.isdir(path):
318 333 visit = [path]
319 334 readdir = self.rawvfs.readdir
320 335 while visit:
321 336 p = visit.pop()
322 337 for f, kind, st in readdir(p, stat=True):
323 338 fp = p + '/' + f
324 339 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
325 340 n = util.pconvert(fp[striplen:])
326 341 l.append((decodedir(n), n, st.st_size))
327 342 elif kind == stat.S_IFDIR and recurse:
328 343 visit.append(fp)
329 344 l.sort()
330 345 return l
331 346
332 347 def datafiles(self):
333 348 return self._walk('data', True) + self._walk('meta', True)
334 349
335 350 def topfiles(self):
336 351 # yield manifest before changelog
337 352 return reversed(self._walk('', False))
338 353
339 354 def walk(self):
340 355 '''yields (unencoded, encoded, size)'''
341 356 # yield data files first
342 357 for x in self.datafiles():
343 358 yield x
344 359 for x in self.topfiles():
345 360 yield x
346 361
347 362 def copylist(self):
348 363 return ['requires'] + _data.split()
349 364
350 365 def write(self, tr):
351 366 pass
352 367
353 368 def invalidatecaches(self):
354 369 pass
355 370
356 371 def markremoved(self, fn):
357 372 pass
358 373
359 374 def __contains__(self, path):
360 375 '''Checks if the store contains path'''
361 376 path = "/".join(("data", path))
362 377 # file?
363 378 if self.vfs.exists(path + ".i"):
364 379 return True
365 380 # dir?
366 381 if not path.endswith("/"):
367 382 path = path + "/"
368 383 return self.vfs.exists(path)
369 384
370 385 class encodedstore(basicstore):
371 386 def __init__(self, path, vfstype):
372 387 vfs = vfstype(path + '/store')
373 388 self.path = vfs.base
374 389 self.createmode = _calcmode(vfs)
375 390 vfs.createmode = self.createmode
376 391 self.rawvfs = vfs
377 392 self.vfs = scmutil.filtervfs(vfs, encodefilename)
378 393 self.opener = self.vfs
379 394
380 395 def datafiles(self):
381 396 for a, b, size in super(encodedstore, self).datafiles():
382 397 try:
383 398 a = decodefilename(a)
384 399 except KeyError:
385 400 a = None
386 401 yield a, b, size
387 402
388 403 def join(self, f):
389 404 return self.path + '/' + encodefilename(f)
390 405
391 406 def copylist(self):
392 407 return (['requires', '00changelog.i'] +
393 408 ['store/' + f for f in _data.split()])
394 409
395 410 class fncache(object):
396 411 # the filename used to be partially encoded
397 412 # hence the encodedir/decodedir dance
398 413 def __init__(self, vfs):
399 414 self.vfs = vfs
400 415 self.entries = None
401 416 self._dirty = False
402 417
403 418 def _load(self):
404 419 '''fill the entries from the fncache file'''
405 420 self._dirty = False
406 421 try:
407 422 fp = self.vfs('fncache', mode='rb')
408 423 except IOError:
409 424 # skip nonexistent file
410 425 self.entries = set()
411 426 return
412 427 self.entries = set(decodedir(fp.read()).splitlines())
413 428 if '' in self.entries:
414 429 fp.seek(0)
415 430 for n, line in enumerate(fp):
416 431 if not line.rstrip('\n'):
417 432 t = _('invalid entry in fncache, line %d') % (n + 1)
418 433 raise error.Abort(t)
419 434 fp.close()
420 435
421 436 def write(self, tr):
422 437 if self._dirty:
423 438 tr.addbackup('fncache')
424 439 fp = self.vfs('fncache', mode='wb', atomictemp=True)
425 440 if self.entries:
426 441 fp.write(encodedir('\n'.join(self.entries) + '\n'))
427 442 fp.close()
428 443 self._dirty = False
429 444
430 445 def add(self, fn):
431 446 if self.entries is None:
432 447 self._load()
433 448 if fn not in self.entries:
434 449 self._dirty = True
435 450 self.entries.add(fn)
436 451
437 452 def remove(self, fn):
438 453 if self.entries is None:
439 454 self._load()
440 455 try:
441 456 self.entries.remove(fn)
442 457 self._dirty = True
443 458 except KeyError:
444 459 pass
445 460
446 461 def __contains__(self, fn):
447 462 if self.entries is None:
448 463 self._load()
449 464 return fn in self.entries
450 465
451 466 def __iter__(self):
452 467 if self.entries is None:
453 468 self._load()
454 469 return iter(self.entries)
455 470
456 471 class _fncachevfs(scmutil.abstractvfs, scmutil.auditvfs):
457 472 def __init__(self, vfs, fnc, encode):
458 473 scmutil.auditvfs.__init__(self, vfs)
459 474 self.fncache = fnc
460 475 self.encode = encode
461 476
462 477 def __call__(self, path, mode='r', *args, **kw):
463 478 if mode not in ('r', 'rb') and (path.startswith('data/') or
464 479 path.startswith('meta/')):
465 480 self.fncache.add(path)
466 481 return self.vfs(self.encode(path), mode, *args, **kw)
467 482
468 483 def join(self, path):
469 484 if path:
470 485 return self.vfs.join(self.encode(path))
471 486 else:
472 487 return self.vfs.join(path)
473 488
474 489 class fncachestore(basicstore):
475 490 def __init__(self, path, vfstype, dotencode):
476 491 if dotencode:
477 492 encode = _pathencode
478 493 else:
479 494 encode = _plainhybridencode
480 495 self.encode = encode
481 496 vfs = vfstype(path + '/store')
482 497 self.path = vfs.base
483 498 self.pathsep = self.path + '/'
484 499 self.createmode = _calcmode(vfs)
485 500 vfs.createmode = self.createmode
486 501 self.rawvfs = vfs
487 502 fnc = fncache(vfs)
488 503 self.fncache = fnc
489 504 self.vfs = _fncachevfs(vfs, fnc, encode)
490 505 self.opener = self.vfs
491 506
492 507 def join(self, f):
493 508 return self.pathsep + self.encode(f)
494 509
495 510 def getsize(self, path):
496 511 return self.rawvfs.stat(path).st_size
497 512
498 513 def datafiles(self):
499 514 for f in sorted(self.fncache):
500 515 ef = self.encode(f)
501 516 try:
502 517 yield f, ef, self.getsize(ef)
503 518 except OSError as err:
504 519 if err.errno != errno.ENOENT:
505 520 raise
506 521
507 522 def copylist(self):
508 523 d = ('data meta dh fncache phaseroots obsstore'
509 524 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
510 525 return (['requires', '00changelog.i'] +
511 526 ['store/' + f for f in d.split()])
512 527
513 528 def write(self, tr):
514 529 self.fncache.write(tr)
515 530
516 531 def invalidatecaches(self):
517 532 self.fncache.entries = None
518 533
519 534 def markremoved(self, fn):
520 535 self.fncache.remove(fn)
521 536
522 537 def _exists(self, f):
523 538 ef = self.encode(f)
524 539 try:
525 540 self.getsize(ef)
526 541 return True
527 542 except OSError as err:
528 543 if err.errno != errno.ENOENT:
529 544 raise
530 545 # nonexistent entry
531 546 return False
532 547
533 548 def __contains__(self, path):
534 549 '''Checks if the store contains path'''
535 550 path = "/".join(("data", path))
536 551 # check for files (exact match)
537 552 e = path + '.i'
538 553 if e in self.fncache and self._exists(e):
539 554 return True
540 555 # now check for directories (prefix match)
541 556 if not path.endswith('/'):
542 557 path += '/'
543 558 for e in self.fncache:
544 559 if e.startswith(path) and self._exists(e):
545 560 return True
546 561 return False
547 562
548 563 def store(requirements, path, vfstype):
549 564 if 'store' in requirements:
550 565 if 'fncache' in requirements:
551 566 return fncachestore(path, vfstype, 'dotencode' in requirements)
552 567 return encodedstore(path, vfstype)
553 568 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now