##// END OF EJS Templates
py3: make the string unicode so its iterable in py3k
Mateusz Kwapich -
r30076:400dfded default
parent child Browse files
Show More
@@ -1,567 +1,567 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import errno
11 11 import hashlib
12 12 import os
13 13 import stat
14 14
15 15 from .i18n import _
16 16 from . import (
17 17 error,
18 18 parsers,
19 19 scmutil,
20 20 util,
21 21 )
22 22
23 23 # This avoids a collision between a file named foo and a dir named
24 24 # foo.i or foo.d
25 25 def _encodedir(path):
26 26 '''
27 27 >>> _encodedir('data/foo.i')
28 28 'data/foo.i'
29 29 >>> _encodedir('data/foo.i/bla.i')
30 30 'data/foo.i.hg/bla.i'
31 31 >>> _encodedir('data/foo.i.hg/bla.i')
32 32 'data/foo.i.hg.hg/bla.i'
33 33 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
34 34 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
35 35 '''
36 36 return (path
37 37 .replace(".hg/", ".hg.hg/")
38 38 .replace(".i/", ".i.hg/")
39 39 .replace(".d/", ".d.hg/"))
40 40
41 41 encodedir = getattr(parsers, 'encodedir', _encodedir)
42 42
43 43 def decodedir(path):
44 44 '''
45 45 >>> decodedir('data/foo.i')
46 46 'data/foo.i'
47 47 >>> decodedir('data/foo.i.hg/bla.i')
48 48 'data/foo.i/bla.i'
49 49 >>> decodedir('data/foo.i.hg.hg/bla.i')
50 50 'data/foo.i.hg/bla.i'
51 51 '''
52 52 if ".hg/" not in path:
53 53 return path
54 54 return (path
55 55 .replace(".d.hg/", ".d/")
56 56 .replace(".i.hg/", ".i/")
57 57 .replace(".hg.hg/", ".hg/"))
58 58
59 59 def _reserved():
60 60 ''' characters that are problematic for filesystems
61 61
62 62 * ascii escapes (0..31)
63 63 * ascii hi (126..255)
64 64 * windows specials
65 65
66 66 these characters will be escaped by encodefunctions
67 67 '''
68 winreserved = [ord(x) for x in '\\:*?"<>|']
68 winreserved = [ord(x) for x in u'\\:*?"<>|']
69 69 for x in range(32):
70 70 yield x
71 71 for x in range(126, 256):
72 72 yield x
73 73 for x in winreserved:
74 74 yield x
75 75
76 76 def _buildencodefun():
77 77 '''
78 78 >>> enc, dec = _buildencodefun()
79 79
80 80 >>> enc('nothing/special.txt')
81 81 'nothing/special.txt'
82 82 >>> dec('nothing/special.txt')
83 83 'nothing/special.txt'
84 84
85 85 >>> enc('HELLO')
86 86 '_h_e_l_l_o'
87 87 >>> dec('_h_e_l_l_o')
88 88 'HELLO'
89 89
90 90 >>> enc('hello:world?')
91 91 'hello~3aworld~3f'
92 92 >>> dec('hello~3aworld~3f')
93 93 'hello:world?'
94 94
95 95 >>> enc('the\x07quick\xADshot')
96 96 'the~07quick~adshot'
97 97 >>> dec('the~07quick~adshot')
98 98 'the\\x07quick\\xadshot'
99 99 '''
100 100 e = '_'
101 101 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
102 102 for x in _reserved():
103 103 cmap[chr(x)] = "~%02x" % x
104 104 for x in list(range(ord("A"), ord("Z") + 1)) + [ord(e)]:
105 105 cmap[chr(x)] = e + chr(x).lower()
106 106 dmap = {}
107 107 for k, v in cmap.iteritems():
108 108 dmap[v] = k
109 109 def decode(s):
110 110 i = 0
111 111 while i < len(s):
112 112 for l in xrange(1, 4):
113 113 try:
114 114 yield dmap[s[i:i + l]]
115 115 i += l
116 116 break
117 117 except KeyError:
118 118 pass
119 119 else:
120 120 raise KeyError
121 121 return (lambda s: ''.join([cmap[c] for c in s]),
122 122 lambda s: ''.join(list(decode(s))))
123 123
124 124 _encodefname, _decodefname = _buildencodefun()
125 125
126 126 def encodefilename(s):
127 127 '''
128 128 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
129 129 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
130 130 '''
131 131 return _encodefname(encodedir(s))
132 132
133 133 def decodefilename(s):
134 134 '''
135 135 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
136 136 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
137 137 '''
138 138 return decodedir(_decodefname(s))
139 139
140 140 def _buildlowerencodefun():
141 141 '''
142 142 >>> f = _buildlowerencodefun()
143 143 >>> f('nothing/special.txt')
144 144 'nothing/special.txt'
145 145 >>> f('HELLO')
146 146 'hello'
147 147 >>> f('hello:world?')
148 148 'hello~3aworld~3f'
149 149 >>> f('the\x07quick\xADshot')
150 150 'the~07quick~adshot'
151 151 '''
152 152 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
153 153 for x in _reserved():
154 154 cmap[chr(x)] = "~%02x" % x
155 155 for x in range(ord("A"), ord("Z") + 1):
156 156 cmap[chr(x)] = chr(x).lower()
157 157 return lambda s: "".join([cmap[c] for c in s])
158 158
159 159 lowerencode = getattr(parsers, 'lowerencode', None) or _buildlowerencodefun()
160 160
161 161 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
162 162 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
163 163 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
164 164 def _auxencode(path, dotencode):
165 165 '''
166 166 Encodes filenames containing names reserved by Windows or which end in
167 167 period or space. Does not touch other single reserved characters c.
168 168 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
169 169 Additionally encodes space or period at the beginning, if dotencode is
170 170 True. Parameter path is assumed to be all lowercase.
171 171 A segment only needs encoding if a reserved name appears as a
172 172 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
173 173 doesn't need encoding.
174 174
175 175 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
176 176 >>> _auxencode(s.split('/'), True)
177 177 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
178 178 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
179 179 >>> _auxencode(s.split('/'), False)
180 180 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
181 181 >>> _auxencode(['foo. '], True)
182 182 ['foo.~20']
183 183 >>> _auxencode([' .foo'], True)
184 184 ['~20.foo']
185 185 '''
186 186 for i, n in enumerate(path):
187 187 if not n:
188 188 continue
189 189 if dotencode and n[0] in '. ':
190 190 n = "~%02x" % ord(n[0]) + n[1:]
191 191 path[i] = n
192 192 else:
193 193 l = n.find('.')
194 194 if l == -1:
195 195 l = len(n)
196 196 if ((l == 3 and n[:3] in _winres3) or
197 197 (l == 4 and n[3] <= '9' and n[3] >= '1'
198 198 and n[:3] in _winres4)):
199 199 # encode third letter ('aux' -> 'au~78')
200 200 ec = "~%02x" % ord(n[2])
201 201 n = n[0:2] + ec + n[3:]
202 202 path[i] = n
203 203 if n[-1] in '. ':
204 204 # encode last period or space ('foo...' -> 'foo..~2e')
205 205 path[i] = n[:-1] + "~%02x" % ord(n[-1])
206 206 return path
207 207
208 208 _maxstorepathlen = 120
209 209 _dirprefixlen = 8
210 210 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
211 211
212 212 def _hashencode(path, dotencode):
213 213 digest = hashlib.sha1(path).hexdigest()
214 214 le = lowerencode(path[5:]).split('/') # skips prefix 'data/' or 'meta/'
215 215 parts = _auxencode(le, dotencode)
216 216 basename = parts[-1]
217 217 _root, ext = os.path.splitext(basename)
218 218 sdirs = []
219 219 sdirslen = 0
220 220 for p in parts[:-1]:
221 221 d = p[:_dirprefixlen]
222 222 if d[-1] in '. ':
223 223 # Windows can't access dirs ending in period or space
224 224 d = d[:-1] + '_'
225 225 if sdirslen == 0:
226 226 t = len(d)
227 227 else:
228 228 t = sdirslen + 1 + len(d)
229 229 if t > _maxshortdirslen:
230 230 break
231 231 sdirs.append(d)
232 232 sdirslen = t
233 233 dirs = '/'.join(sdirs)
234 234 if len(dirs) > 0:
235 235 dirs += '/'
236 236 res = 'dh/' + dirs + digest + ext
237 237 spaceleft = _maxstorepathlen - len(res)
238 238 if spaceleft > 0:
239 239 filler = basename[:spaceleft]
240 240 res = 'dh/' + dirs + filler + digest + ext
241 241 return res
242 242
243 243 def _hybridencode(path, dotencode):
244 244 '''encodes path with a length limit
245 245
246 246 Encodes all paths that begin with 'data/', according to the following.
247 247
248 248 Default encoding (reversible):
249 249
250 250 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
251 251 characters are encoded as '~xx', where xx is the two digit hex code
252 252 of the character (see encodefilename).
253 253 Relevant path components consisting of Windows reserved filenames are
254 254 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
255 255
256 256 Hashed encoding (not reversible):
257 257
258 258 If the default-encoded path is longer than _maxstorepathlen, a
259 259 non-reversible hybrid hashing of the path is done instead.
260 260 This encoding uses up to _dirprefixlen characters of all directory
261 261 levels of the lowerencoded path, but not more levels than can fit into
262 262 _maxshortdirslen.
263 263 Then follows the filler followed by the sha digest of the full path.
264 264 The filler is the beginning of the basename of the lowerencoded path
265 265 (the basename is everything after the last path separator). The filler
266 266 is as long as possible, filling in characters from the basename until
267 267 the encoded path has _maxstorepathlen characters (or all chars of the
268 268 basename have been taken).
269 269 The extension (e.g. '.i' or '.d') is preserved.
270 270
271 271 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
272 272 encoding was used.
273 273 '''
274 274 path = encodedir(path)
275 275 ef = _encodefname(path).split('/')
276 276 res = '/'.join(_auxencode(ef, dotencode))
277 277 if len(res) > _maxstorepathlen:
278 278 res = _hashencode(path, dotencode)
279 279 return res
280 280
281 281 def _pathencode(path):
282 282 de = encodedir(path)
283 283 if len(path) > _maxstorepathlen:
284 284 return _hashencode(de, True)
285 285 ef = _encodefname(de).split('/')
286 286 res = '/'.join(_auxencode(ef, True))
287 287 if len(res) > _maxstorepathlen:
288 288 return _hashencode(de, True)
289 289 return res
290 290
291 291 _pathencode = getattr(parsers, 'pathencode', _pathencode)
292 292
293 293 def _plainhybridencode(f):
294 294 return _hybridencode(f, False)
295 295
296 296 def _calcmode(vfs):
297 297 try:
298 298 # files in .hg/ will be created using this mode
299 299 mode = vfs.stat().st_mode
300 300 # avoid some useless chmods
301 301 if (0o777 & ~util.umask) == (0o777 & mode):
302 302 mode = None
303 303 except OSError:
304 304 mode = None
305 305 return mode
306 306
307 307 _data = ('data meta 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
308 308 ' phaseroots obsstore')
309 309
310 310 class basicstore(object):
311 311 '''base class for local repository stores'''
312 312 def __init__(self, path, vfstype):
313 313 vfs = vfstype(path)
314 314 self.path = vfs.base
315 315 self.createmode = _calcmode(vfs)
316 316 vfs.createmode = self.createmode
317 317 self.rawvfs = vfs
318 318 self.vfs = scmutil.filtervfs(vfs, encodedir)
319 319 self.opener = self.vfs
320 320
321 321 def join(self, f):
322 322 return self.path + '/' + encodedir(f)
323 323
324 324 def _walk(self, relpath, recurse):
325 325 '''yields (unencoded, encoded, size)'''
326 326 path = self.path
327 327 if relpath:
328 328 path += '/' + relpath
329 329 striplen = len(self.path) + 1
330 330 l = []
331 331 if self.rawvfs.isdir(path):
332 332 visit = [path]
333 333 readdir = self.rawvfs.readdir
334 334 while visit:
335 335 p = visit.pop()
336 336 for f, kind, st in readdir(p, stat=True):
337 337 fp = p + '/' + f
338 338 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
339 339 n = util.pconvert(fp[striplen:])
340 340 l.append((decodedir(n), n, st.st_size))
341 341 elif kind == stat.S_IFDIR and recurse:
342 342 visit.append(fp)
343 343 l.sort()
344 344 return l
345 345
346 346 def datafiles(self):
347 347 return self._walk('data', True) + self._walk('meta', True)
348 348
349 349 def topfiles(self):
350 350 # yield manifest before changelog
351 351 return reversed(self._walk('', False))
352 352
353 353 def walk(self):
354 354 '''yields (unencoded, encoded, size)'''
355 355 # yield data files first
356 356 for x in self.datafiles():
357 357 yield x
358 358 for x in self.topfiles():
359 359 yield x
360 360
361 361 def copylist(self):
362 362 return ['requires'] + _data.split()
363 363
364 364 def write(self, tr):
365 365 pass
366 366
367 367 def invalidatecaches(self):
368 368 pass
369 369
370 370 def markremoved(self, fn):
371 371 pass
372 372
373 373 def __contains__(self, path):
374 374 '''Checks if the store contains path'''
375 375 path = "/".join(("data", path))
376 376 # file?
377 377 if self.vfs.exists(path + ".i"):
378 378 return True
379 379 # dir?
380 380 if not path.endswith("/"):
381 381 path = path + "/"
382 382 return self.vfs.exists(path)
383 383
384 384 class encodedstore(basicstore):
385 385 def __init__(self, path, vfstype):
386 386 vfs = vfstype(path + '/store')
387 387 self.path = vfs.base
388 388 self.createmode = _calcmode(vfs)
389 389 vfs.createmode = self.createmode
390 390 self.rawvfs = vfs
391 391 self.vfs = scmutil.filtervfs(vfs, encodefilename)
392 392 self.opener = self.vfs
393 393
394 394 def datafiles(self):
395 395 for a, b, size in super(encodedstore, self).datafiles():
396 396 try:
397 397 a = decodefilename(a)
398 398 except KeyError:
399 399 a = None
400 400 yield a, b, size
401 401
402 402 def join(self, f):
403 403 return self.path + '/' + encodefilename(f)
404 404
405 405 def copylist(self):
406 406 return (['requires', '00changelog.i'] +
407 407 ['store/' + f for f in _data.split()])
408 408
409 409 class fncache(object):
410 410 # the filename used to be partially encoded
411 411 # hence the encodedir/decodedir dance
412 412 def __init__(self, vfs):
413 413 self.vfs = vfs
414 414 self.entries = None
415 415 self._dirty = False
416 416
417 417 def _load(self):
418 418 '''fill the entries from the fncache file'''
419 419 self._dirty = False
420 420 try:
421 421 fp = self.vfs('fncache', mode='rb')
422 422 except IOError:
423 423 # skip nonexistent file
424 424 self.entries = set()
425 425 return
426 426 self.entries = set(decodedir(fp.read()).splitlines())
427 427 if '' in self.entries:
428 428 fp.seek(0)
429 429 for n, line in enumerate(fp):
430 430 if not line.rstrip('\n'):
431 431 t = _('invalid entry in fncache, line %d') % (n + 1)
432 432 raise error.Abort(t)
433 433 fp.close()
434 434
435 435 def write(self, tr):
436 436 if self._dirty:
437 437 tr.addbackup('fncache')
438 438 fp = self.vfs('fncache', mode='wb', atomictemp=True)
439 439 if self.entries:
440 440 fp.write(encodedir('\n'.join(self.entries) + '\n'))
441 441 fp.close()
442 442 self._dirty = False
443 443
444 444 def add(self, fn):
445 445 if self.entries is None:
446 446 self._load()
447 447 if fn not in self.entries:
448 448 self._dirty = True
449 449 self.entries.add(fn)
450 450
451 451 def remove(self, fn):
452 452 if self.entries is None:
453 453 self._load()
454 454 try:
455 455 self.entries.remove(fn)
456 456 self._dirty = True
457 457 except KeyError:
458 458 pass
459 459
460 460 def __contains__(self, fn):
461 461 if self.entries is None:
462 462 self._load()
463 463 return fn in self.entries
464 464
465 465 def __iter__(self):
466 466 if self.entries is None:
467 467 self._load()
468 468 return iter(self.entries)
469 469
470 470 class _fncachevfs(scmutil.abstractvfs, scmutil.auditvfs):
471 471 def __init__(self, vfs, fnc, encode):
472 472 scmutil.auditvfs.__init__(self, vfs)
473 473 self.fncache = fnc
474 474 self.encode = encode
475 475
476 476 def __call__(self, path, mode='r', *args, **kw):
477 477 if mode not in ('r', 'rb') and (path.startswith('data/') or
478 478 path.startswith('meta/')):
479 479 self.fncache.add(path)
480 480 return self.vfs(self.encode(path), mode, *args, **kw)
481 481
482 482 def join(self, path):
483 483 if path:
484 484 return self.vfs.join(self.encode(path))
485 485 else:
486 486 return self.vfs.join(path)
487 487
488 488 class fncachestore(basicstore):
489 489 def __init__(self, path, vfstype, dotencode):
490 490 if dotencode:
491 491 encode = _pathencode
492 492 else:
493 493 encode = _plainhybridencode
494 494 self.encode = encode
495 495 vfs = vfstype(path + '/store')
496 496 self.path = vfs.base
497 497 self.pathsep = self.path + '/'
498 498 self.createmode = _calcmode(vfs)
499 499 vfs.createmode = self.createmode
500 500 self.rawvfs = vfs
501 501 fnc = fncache(vfs)
502 502 self.fncache = fnc
503 503 self.vfs = _fncachevfs(vfs, fnc, encode)
504 504 self.opener = self.vfs
505 505
506 506 def join(self, f):
507 507 return self.pathsep + self.encode(f)
508 508
509 509 def getsize(self, path):
510 510 return self.rawvfs.stat(path).st_size
511 511
512 512 def datafiles(self):
513 513 for f in sorted(self.fncache):
514 514 ef = self.encode(f)
515 515 try:
516 516 yield f, ef, self.getsize(ef)
517 517 except OSError as err:
518 518 if err.errno != errno.ENOENT:
519 519 raise
520 520
521 521 def copylist(self):
522 522 d = ('data meta dh fncache phaseroots obsstore'
523 523 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
524 524 return (['requires', '00changelog.i'] +
525 525 ['store/' + f for f in d.split()])
526 526
527 527 def write(self, tr):
528 528 self.fncache.write(tr)
529 529
530 530 def invalidatecaches(self):
531 531 self.fncache.entries = None
532 532
533 533 def markremoved(self, fn):
534 534 self.fncache.remove(fn)
535 535
536 536 def _exists(self, f):
537 537 ef = self.encode(f)
538 538 try:
539 539 self.getsize(ef)
540 540 return True
541 541 except OSError as err:
542 542 if err.errno != errno.ENOENT:
543 543 raise
544 544 # nonexistent entry
545 545 return False
546 546
547 547 def __contains__(self, path):
548 548 '''Checks if the store contains path'''
549 549 path = "/".join(("data", path))
550 550 # check for files (exact match)
551 551 e = path + '.i'
552 552 if e in self.fncache and self._exists(e):
553 553 return True
554 554 # now check for directories (prefix match)
555 555 if not path.endswith('/'):
556 556 path += '/'
557 557 for e in self.fncache:
558 558 if e.startswith(path) and self._exists(e):
559 559 return True
560 560 return False
561 561
562 562 def store(requirements, path, vfstype):
563 563 if 'store' in requirements:
564 564 if 'fncache' in requirements:
565 565 return fncachestore(path, vfstype, 'dotencode' in requirements)
566 566 return encodedstore(path, vfstype)
567 567 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now