##// END OF EJS Templates
store: add a contains method to basicstore...
smuralid -
r17744:09d5b205 default
parent child Browse files
Show More
@@ -1,508 +1,519 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util, parsers
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def _encodedir(path):
17 17 '''
18 18 >>> _encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> _encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> _encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 26 '''
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33 33
34 34 def decodedir(path):
35 35 '''
36 36 >>> decodedir('data/foo.i')
37 37 'data/foo.i'
38 38 >>> decodedir('data/foo.i.hg/bla.i')
39 39 'data/foo.i/bla.i'
40 40 >>> decodedir('data/foo.i.hg.hg/bla.i')
41 41 'data/foo.i.hg/bla.i'
42 42 '''
43 43 if ".hg/" not in path:
44 44 return path
45 45 return (path
46 46 .replace(".d.hg/", ".d/")
47 47 .replace(".i.hg/", ".i/")
48 48 .replace(".hg.hg/", ".hg/"))
49 49
50 50 def _buildencodefun():
51 51 '''
52 52 >>> enc, dec = _buildencodefun()
53 53
54 54 >>> enc('nothing/special.txt')
55 55 'nothing/special.txt'
56 56 >>> dec('nothing/special.txt')
57 57 'nothing/special.txt'
58 58
59 59 >>> enc('HELLO')
60 60 '_h_e_l_l_o'
61 61 >>> dec('_h_e_l_l_o')
62 62 'HELLO'
63 63
64 64 >>> enc('hello:world?')
65 65 'hello~3aworld~3f'
66 66 >>> dec('hello~3aworld~3f')
67 67 'hello:world?'
68 68
69 69 >>> enc('the\x07quick\xADshot')
70 70 'the~07quick~adshot'
71 71 >>> dec('the~07quick~adshot')
72 72 'the\\x07quick\\xadshot'
73 73 '''
74 74 e = '_'
75 75 winreserved = [ord(x) for x in '\\:*?"<>|']
76 76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
77 77 for x in (range(32) + range(126, 256) + winreserved):
78 78 cmap[chr(x)] = "~%02x" % x
79 79 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
80 80 cmap[chr(x)] = e + chr(x).lower()
81 81 dmap = {}
82 82 for k, v in cmap.iteritems():
83 83 dmap[v] = k
84 84 def decode(s):
85 85 i = 0
86 86 while i < len(s):
87 87 for l in xrange(1, 4):
88 88 try:
89 89 yield dmap[s[i:i + l]]
90 90 i += l
91 91 break
92 92 except KeyError:
93 93 pass
94 94 else:
95 95 raise KeyError
96 96 return (lambda s: ''.join([cmap[c] for c in s]),
97 97 lambda s: ''.join(list(decode(s))))
98 98
99 99 _encodefname, _decodefname = _buildencodefun()
100 100
101 101 def encodefilename(s):
102 102 '''
103 103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
104 104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
105 105 '''
106 106 return _encodefname(encodedir(s))
107 107
108 108 def decodefilename(s):
109 109 '''
110 110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
111 111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
112 112 '''
113 113 return decodedir(_decodefname(s))
114 114
115 115 def _buildlowerencodefun():
116 116 '''
117 117 >>> f = _buildlowerencodefun()
118 118 >>> f('nothing/special.txt')
119 119 'nothing/special.txt'
120 120 >>> f('HELLO')
121 121 'hello'
122 122 >>> f('hello:world?')
123 123 'hello~3aworld~3f'
124 124 >>> f('the\x07quick\xADshot')
125 125 'the~07quick~adshot'
126 126 '''
127 127 winreserved = [ord(x) for x in '\\:*?"<>|']
128 128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
129 129 for x in (range(32) + range(126, 256) + winreserved):
130 130 cmap[chr(x)] = "~%02x" % x
131 131 for x in range(ord("A"), ord("Z")+1):
132 132 cmap[chr(x)] = chr(x).lower()
133 133 return lambda s: "".join([cmap[c] for c in s])
134 134
135 135 lowerencode = _buildlowerencodefun()
136 136
137 137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
138 138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
139 139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
140 140 def _auxencode(path, dotencode):
141 141 '''
142 142 Encodes filenames containing names reserved by Windows or which end in
143 143 period or space. Does not touch other single reserved characters c.
144 144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
145 145 Additionally encodes space or period at the beginning, if dotencode is
146 146 True. Parameter path is assumed to be all lowercase.
147 147 A segment only needs encoding if a reserved name appears as a
148 148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
149 149 doesn't need encoding.
150 150
151 151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
152 152 >>> _auxencode(s.split('/'), True)
153 153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
154 154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
155 155 >>> _auxencode(s.split('/'), False)
156 156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
157 157 >>> _auxencode(['foo. '], True)
158 158 ['foo.~20']
159 159 >>> _auxencode([' .foo'], True)
160 160 ['~20.foo']
161 161 '''
162 162 for i, n in enumerate(path):
163 163 if not n:
164 164 continue
165 165 if dotencode and n[0] in '. ':
166 166 n = "~%02x" % ord(n[0]) + n[1:]
167 167 path[i] = n
168 168 else:
169 169 l = n.find('.')
170 170 if l == -1:
171 171 l = len(n)
172 172 if ((l == 3 and n[:3] in _winres3) or
173 173 (l == 4 and n[3] <= '9' and n[3] >= '1'
174 174 and n[:3] in _winres4)):
175 175 # encode third letter ('aux' -> 'au~78')
176 176 ec = "~%02x" % ord(n[2])
177 177 n = n[0:2] + ec + n[3:]
178 178 path[i] = n
179 179 if n[-1] in '. ':
180 180 # encode last period or space ('foo...' -> 'foo..~2e')
181 181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
182 182 return path
183 183
184 184 _maxstorepathlen = 120
185 185 _dirprefixlen = 8
186 186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
187 187
188 188 def _hashencode(path, dotencode):
189 189 digest = _sha(path).hexdigest()
190 190 le = lowerencode(path).split('/')[1:]
191 191 parts = _auxencode(le, dotencode)
192 192 basename = parts[-1]
193 193 _root, ext = os.path.splitext(basename)
194 194 sdirs = []
195 195 sdirslen = 0
196 196 for p in parts[:-1]:
197 197 d = p[:_dirprefixlen]
198 198 if d[-1] in '. ':
199 199 # Windows can't access dirs ending in period or space
200 200 d = d[:-1] + '_'
201 201 if sdirslen == 0:
202 202 t = len(d)
203 203 else:
204 204 t = sdirslen + 1 + len(d)
205 205 if t > _maxshortdirslen:
206 206 break
207 207 sdirs.append(d)
208 208 sdirslen = t
209 209 dirs = '/'.join(sdirs)
210 210 if len(dirs) > 0:
211 211 dirs += '/'
212 212 res = 'dh/' + dirs + digest + ext
213 213 spaceleft = _maxstorepathlen - len(res)
214 214 if spaceleft > 0:
215 215 filler = basename[:spaceleft]
216 216 res = 'dh/' + dirs + filler + digest + ext
217 217 return res
218 218
219 219 def _hybridencode(path, dotencode):
220 220 '''encodes path with a length limit
221 221
222 222 Encodes all paths that begin with 'data/', according to the following.
223 223
224 224 Default encoding (reversible):
225 225
226 226 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
227 227 characters are encoded as '~xx', where xx is the two digit hex code
228 228 of the character (see encodefilename).
229 229 Relevant path components consisting of Windows reserved filenames are
230 230 masked by encoding the third character ('aux' -> 'au~78', see _auxencode).
231 231
232 232 Hashed encoding (not reversible):
233 233
234 234 If the default-encoded path is longer than _maxstorepathlen, a
235 235 non-reversible hybrid hashing of the path is done instead.
236 236 This encoding uses up to _dirprefixlen characters of all directory
237 237 levels of the lowerencoded path, but not more levels than can fit into
238 238 _maxshortdirslen.
239 239 Then follows the filler followed by the sha digest of the full path.
240 240 The filler is the beginning of the basename of the lowerencoded path
241 241 (the basename is everything after the last path separator). The filler
242 242 is as long as possible, filling in characters from the basename until
243 243 the encoded path has _maxstorepathlen characters (or all chars of the
244 244 basename have been taken).
245 245 The extension (e.g. '.i' or '.d') is preserved.
246 246
247 247 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
248 248 encoding was used.
249 249 '''
250 250 path = encodedir(path)
251 251 ef = _encodefname(path).split('/')
252 252 res = '/'.join(_auxencode(ef, dotencode))
253 253 if len(res) > _maxstorepathlen:
254 254 res = _hashencode(path, dotencode)
255 255 return res
256 256
257 257 def _pathencode(path):
258 258 if len(path) > _maxstorepathlen:
259 259 return None
260 260 ef = _encodefname(encodedir(path)).split('/')
261 261 res = '/'.join(_auxencode(ef, True))
262 262 if len(res) > _maxstorepathlen:
263 263 return None
264 264 return res
265 265
266 266 _pathencode = getattr(parsers, 'pathencode', _pathencode)
267 267
268 268 def _dothybridencode(f):
269 269 ef = _pathencode(f)
270 270 if ef is None:
271 271 return _hashencode(encodedir(f), True)
272 272 return ef
273 273
274 274 def _plainhybridencode(f):
275 275 return _hybridencode(f, False)
276 276
277 277 def _calcmode(vfs):
278 278 try:
279 279 # files in .hg/ will be created using this mode
280 280 mode = vfs.stat().st_mode
281 281 # avoid some useless chmods
282 282 if (0777 & ~util.umask) == (0777 & mode):
283 283 mode = None
284 284 except OSError:
285 285 mode = None
286 286 return mode
287 287
288 288 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
289 289 ' phaseroots obsstore')
290 290
291 291 class basicstore(object):
292 292 '''base class for local repository stores'''
293 293 def __init__(self, path, vfstype):
294 294 vfs = vfstype(path)
295 295 self.path = vfs.base
296 296 self.createmode = _calcmode(vfs)
297 297 vfs.createmode = self.createmode
298 298 self.rawvfs = vfs
299 299 self.vfs = scmutil.filtervfs(vfs, encodedir)
300 300 self.opener = self.vfs
301 301
302 302 def join(self, f):
303 303 return self.path + '/' + encodedir(f)
304 304
305 305 def _walk(self, relpath, recurse):
306 306 '''yields (unencoded, encoded, size)'''
307 307 path = self.path
308 308 if relpath:
309 309 path += '/' + relpath
310 310 striplen = len(self.path) + 1
311 311 l = []
312 312 if self.rawvfs.isdir(path):
313 313 visit = [path]
314 314 while visit:
315 315 p = visit.pop()
316 316 for f, kind, st in osutil.listdir(p, stat=True):
317 317 fp = p + '/' + f
318 318 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
319 319 n = util.pconvert(fp[striplen:])
320 320 l.append((decodedir(n), n, st.st_size))
321 321 elif kind == stat.S_IFDIR and recurse:
322 322 visit.append(fp)
323 323 l.sort()
324 324 return l
325 325
326 326 def datafiles(self):
327 327 return self._walk('data', True)
328 328
329 329 def walk(self):
330 330 '''yields (unencoded, encoded, size)'''
331 331 # yield data files first
332 332 for x in self.datafiles():
333 333 yield x
334 334 # yield manifest before changelog
335 335 for x in reversed(self._walk('', False)):
336 336 yield x
337 337
338 338 def copylist(self):
339 339 return ['requires'] + _data.split()
340 340
341 341 def write(self):
342 342 pass
343 343
344 def __contains__(self, path):
345 '''Checks if the store contains path'''
346 path = "/".join(("data", path))
347 # file?
348 if os.path.exists(self.join(path + ".i")):
349 return True
350 # dir?
351 if not path.endswith("/"):
352 path = path + "/"
353 return os.path.exists(self.join(path))
354
344 355 class encodedstore(basicstore):
345 356 def __init__(self, path, vfstype):
346 357 vfs = vfstype(path + '/store')
347 358 self.path = vfs.base
348 359 self.createmode = _calcmode(vfs)
349 360 vfs.createmode = self.createmode
350 361 self.rawvfs = vfs
351 362 self.vfs = scmutil.filtervfs(vfs, encodefilename)
352 363 self.opener = self.vfs
353 364
354 365 def datafiles(self):
355 366 for a, b, size in self._walk('data', True):
356 367 try:
357 368 a = decodefilename(a)
358 369 except KeyError:
359 370 a = None
360 371 yield a, b, size
361 372
362 373 def join(self, f):
363 374 return self.path + '/' + encodefilename(f)
364 375
365 376 def copylist(self):
366 377 return (['requires', '00changelog.i'] +
367 378 ['store/' + f for f in _data.split()])
368 379
369 380 class fncache(object):
370 381 # the filename used to be partially encoded
371 382 # hence the encodedir/decodedir dance
372 383 def __init__(self, vfs):
373 384 self.vfs = vfs
374 385 self.entries = None
375 386 self._dirty = False
376 387
377 388 def _load(self):
378 389 '''fill the entries from the fncache file'''
379 390 self._dirty = False
380 391 try:
381 392 fp = self.vfs('fncache', mode='rb')
382 393 except IOError:
383 394 # skip nonexistent file
384 395 self.entries = set()
385 396 return
386 397 self.entries = set(decodedir(fp.read()).splitlines())
387 398 if '' in self.entries:
388 399 fp.seek(0)
389 400 for n, line in enumerate(fp):
390 401 if not line.rstrip('\n'):
391 402 t = _('invalid entry in fncache, line %s') % (n + 1)
392 403 raise util.Abort(t)
393 404 fp.close()
394 405
395 406 def _write(self, files, atomictemp):
396 407 fp = self.vfs('fncache', mode='wb', atomictemp=atomictemp)
397 408 if files:
398 409 fp.write(encodedir('\n'.join(files) + '\n'))
399 410 fp.close()
400 411 self._dirty = False
401 412
402 413 def rewrite(self, files):
403 414 self._write(files, False)
404 415 self.entries = set(files)
405 416
406 417 def write(self):
407 418 if self._dirty:
408 419 self._write(self.entries, True)
409 420
410 421 def add(self, fn):
411 422 if self.entries is None:
412 423 self._load()
413 424 if fn not in self.entries:
414 425 self._dirty = True
415 426 self.entries.add(fn)
416 427
417 428 def __contains__(self, fn):
418 429 if self.entries is None:
419 430 self._load()
420 431 return fn in self.entries
421 432
422 433 def __iter__(self):
423 434 if self.entries is None:
424 435 self._load()
425 436 return iter(self.entries)
426 437
427 438 class _fncachevfs(scmutil.abstractvfs):
428 439 def __init__(self, vfs, fnc, encode):
429 440 self.vfs = vfs
430 441 self.fncache = fnc
431 442 self.encode = encode
432 443
433 444 def _getmustaudit(self):
434 445 return self.vfs.mustaudit
435 446
436 447 def _setmustaudit(self, onoff):
437 448 self.vfs.mustaudit = onoff
438 449
439 450 mustaudit = property(_getmustaudit, _setmustaudit)
440 451
441 452 def __call__(self, path, mode='r', *args, **kw):
442 453 if mode not in ('r', 'rb') and path.startswith('data/'):
443 454 self.fncache.add(path)
444 455 return self.vfs(self.encode(path), mode, *args, **kw)
445 456
446 457 def join(self, path):
447 458 if path:
448 459 return self.vfs.join(self.encode(path))
449 460 else:
450 461 return self.vfs.join(path)
451 462
452 463 class fncachestore(basicstore):
453 464 def __init__(self, path, vfstype, dotencode):
454 465 if dotencode:
455 466 encode = _dothybridencode
456 467 else:
457 468 encode = _plainhybridencode
458 469 self.encode = encode
459 470 vfs = vfstype(path + '/store')
460 471 self.path = vfs.base
461 472 self.pathsep = self.path + '/'
462 473 self.createmode = _calcmode(vfs)
463 474 vfs.createmode = self.createmode
464 475 self.rawvfs = vfs
465 476 fnc = fncache(vfs)
466 477 self.fncache = fnc
467 478 self.vfs = _fncachevfs(vfs, fnc, encode)
468 479 self.opener = self.vfs
469 480
470 481 def join(self, f):
471 482 return self.pathsep + self.encode(f)
472 483
473 484 def getsize(self, path):
474 485 return self.rawvfs.stat(path).st_size
475 486
476 487 def datafiles(self):
477 488 rewrite = False
478 489 existing = []
479 490 for f in sorted(self.fncache):
480 491 ef = self.encode(f)
481 492 try:
482 493 yield f, ef, self.getsize(ef)
483 494 existing.append(f)
484 495 except OSError, err:
485 496 if err.errno != errno.ENOENT:
486 497 raise
487 498 # nonexistent entry
488 499 rewrite = True
489 500 if rewrite:
490 501 # rewrite fncache to remove nonexistent entries
491 502 # (may be caused by rollback / strip)
492 503 self.fncache.rewrite(existing)
493 504
494 505 def copylist(self):
495 506 d = ('data dh fncache phaseroots obsstore'
496 507 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
497 508 return (['requires', '00changelog.i'] +
498 509 ['store/' + f for f in d.split()])
499 510
500 511 def write(self):
501 512 self.fncache.write()
502 513
503 514 def store(requirements, path, vfstype):
504 515 if 'store' in requirements:
505 516 if 'fncache' in requirements:
506 517 return fncachestore(path, vfstype, 'dotencode' in requirements)
507 518 return encodedstore(path, vfstype)
508 519 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now