##// END OF EJS Templates
store: abstract out how we retrieve a file's size
Bryan O'Sullivan -
r17154:d592759a default
parent child Browse files
Show More
@@ -1,428 +1,429
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 121 _winreservednames = '''con prn aux nul
122 122 com1 com2 com3 com4 com5 com6 com7 com8 com9
123 123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 130 True.
131 131 path is assumed to be all lowercase.
132 132
133 133 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
134 134 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
135 135 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/foo.', False)
136 136 '.com1com2/lp~749.lpt4.lpt1/conprn/foo~2e'
137 137 >>> _auxencode('foo. ', True)
138 138 'foo.~20'
139 139 >>> _auxencode(' .foo', True)
140 140 '~20.foo'
141 141 '''
142 142 res = []
143 143 for n in path.split('/'):
144 144 if n:
145 145 base = n.split('.')[0]
146 146 if base and (base in _winreservednames):
147 147 # encode third letter ('aux' -> 'au~78')
148 148 ec = "~%02x" % ord(n[2])
149 149 n = n[0:2] + ec + n[3:]
150 150 if n[-1] in '. ':
151 151 # encode last period or space ('foo...' -> 'foo..~2e')
152 152 n = n[:-1] + "~%02x" % ord(n[-1])
153 153 if dotencode and n[0] in '. ':
154 154 n = "~%02x" % ord(n[0]) + n[1:]
155 155 res.append(n)
156 156 return '/'.join(res)
157 157
158 158 _maxstorepathlen = 120
159 159 _dirprefixlen = 8
160 160 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
161 161 def _hybridencode(path, auxencode):
162 162 '''encodes path with a length limit
163 163
164 164 Encodes all paths that begin with 'data/', according to the following.
165 165
166 166 Default encoding (reversible):
167 167
168 168 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
169 169 characters are encoded as '~xx', where xx is the two digit hex code
170 170 of the character (see encodefilename).
171 171 Relevant path components consisting of Windows reserved filenames are
172 172 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
173 173
174 174 Hashed encoding (not reversible):
175 175
176 176 If the default-encoded path is longer than _maxstorepathlen, a
177 177 non-reversible hybrid hashing of the path is done instead.
178 178 This encoding uses up to _dirprefixlen characters of all directory
179 179 levels of the lowerencoded path, but not more levels than can fit into
180 180 _maxshortdirslen.
181 181 Then follows the filler followed by the sha digest of the full path.
182 182 The filler is the beginning of the basename of the lowerencoded path
183 183 (the basename is everything after the last path separator). The filler
184 184 is as long as possible, filling in characters from the basename until
185 185 the encoded path has _maxstorepathlen characters (or all chars of the
186 186 basename have been taken).
187 187 The extension (e.g. '.i' or '.d') is preserved.
188 188
189 189 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
190 190 encoding was used.
191 191 '''
192 192 if not path.startswith('data/'):
193 193 return path
194 194 # escape directories ending with .i and .d
195 195 path = encodedir(path)
196 196 ndpath = path[len('data/'):]
197 197 res = 'data/' + auxencode(encodefilename(ndpath))
198 198 if len(res) > _maxstorepathlen:
199 199 digest = _sha(path).hexdigest()
200 200 aep = auxencode(lowerencode(ndpath))
201 201 _root, ext = os.path.splitext(aep)
202 202 parts = aep.split('/')
203 203 basename = parts[-1]
204 204 sdirs = []
205 205 for p in parts[:-1]:
206 206 d = p[:_dirprefixlen]
207 207 if d[-1] in '. ':
208 208 # Windows can't access dirs ending in period or space
209 209 d = d[:-1] + '_'
210 210 t = '/'.join(sdirs) + '/' + d
211 211 if len(t) > _maxshortdirslen:
212 212 break
213 213 sdirs.append(d)
214 214 dirs = '/'.join(sdirs)
215 215 if len(dirs) > 0:
216 216 dirs += '/'
217 217 res = 'dh/' + dirs + digest + ext
218 218 spaceleft = _maxstorepathlen - len(res)
219 219 if spaceleft > 0:
220 220 filler = basename[:spaceleft]
221 221 res = 'dh/' + dirs + filler + digest + ext
222 222 return res
223 223
224 224 def _calcmode(path):
225 225 try:
226 226 # files in .hg/ will be created using this mode
227 227 mode = os.stat(path).st_mode
228 228 # avoid some useless chmods
229 229 if (0777 & ~util.umask) == (0777 & mode):
230 230 mode = None
231 231 except OSError:
232 232 mode = None
233 233 return mode
234 234
235 235 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i phaseroots'
236 236
237 237 class basicstore(object):
238 238 '''base class for local repository stores'''
239 239 def __init__(self, path, openertype):
240 240 self.path = path
241 241 self.createmode = _calcmode(path)
242 242 op = openertype(self.path)
243 243 op.createmode = self.createmode
244 244 self.opener = scmutil.filteropener(op, encodedir)
245 245
246 246 def join(self, f):
247 247 return self.path + '/' + encodedir(f)
248 248
249 249 def _walk(self, relpath, recurse):
250 250 '''yields (unencoded, encoded, size)'''
251 251 path = self.path
252 252 if relpath:
253 253 path += '/' + relpath
254 254 striplen = len(self.path) + 1
255 255 l = []
256 256 if os.path.isdir(path):
257 257 visit = [path]
258 258 while visit:
259 259 p = visit.pop()
260 260 for f, kind, st in osutil.listdir(p, stat=True):
261 261 fp = p + '/' + f
262 262 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
263 263 n = util.pconvert(fp[striplen:])
264 264 l.append((decodedir(n), n, st.st_size))
265 265 elif kind == stat.S_IFDIR and recurse:
266 266 visit.append(fp)
267 267 l.sort()
268 268 return l
269 269
270 270 def datafiles(self):
271 271 return self._walk('data', True)
272 272
273 273 def walk(self):
274 274 '''yields (unencoded, encoded, size)'''
275 275 # yield data files first
276 276 for x in self.datafiles():
277 277 yield x
278 278 # yield manifest before changelog
279 279 for x in reversed(self._walk('', False)):
280 280 yield x
281 281
282 282 def copylist(self):
283 283 return ['requires'] + _data.split()
284 284
285 285 def write(self):
286 286 pass
287 287
288 288 class encodedstore(basicstore):
289 289 def __init__(self, path, openertype):
290 290 self.path = path + '/store'
291 291 self.createmode = _calcmode(self.path)
292 292 op = openertype(self.path)
293 293 op.createmode = self.createmode
294 294 self.opener = scmutil.filteropener(op, encodefilename)
295 295
296 296 def datafiles(self):
297 297 for a, b, size in self._walk('data', True):
298 298 try:
299 299 a = decodefilename(a)
300 300 except KeyError:
301 301 a = None
302 302 yield a, b, size
303 303
304 304 def join(self, f):
305 305 return self.path + '/' + encodefilename(f)
306 306
307 307 def copylist(self):
308 308 return (['requires', '00changelog.i'] +
309 309 ['store/' + f for f in _data.split()])
310 310
311 311 class fncache(object):
312 312 # the filename used to be partially encoded
313 313 # hence the encodedir/decodedir dance
314 314 def __init__(self, opener):
315 315 self.opener = opener
316 316 self.entries = None
317 317 self._dirty = False
318 318
319 319 def _load(self):
320 320 '''fill the entries from the fncache file'''
321 321 self._dirty = False
322 322 try:
323 323 fp = self.opener('fncache', mode='rb')
324 324 except IOError:
325 325 # skip nonexistent file
326 326 self.entries = set()
327 327 return
328 328 self.entries = set(map(decodedir, fp.read().splitlines()))
329 329 if '' in self.entries:
330 330 fp.seek(0)
331 331 for n, line in enumerate(fp):
332 332 if not line.rstrip('\n'):
333 333 t = _('invalid entry in fncache, line %s') % (n + 1)
334 334 raise util.Abort(t)
335 335 fp.close()
336 336
337 337 def _write(self, files, atomictemp):
338 338 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
339 339 if files:
340 340 fp.write('\n'.join(map(encodedir, files)) + '\n')
341 341 fp.close()
342 342 self._dirty = False
343 343
344 344 def rewrite(self, files):
345 345 self._write(files, False)
346 346 self.entries = set(files)
347 347
348 348 def write(self):
349 349 if self._dirty:
350 350 self._write(self.entries, True)
351 351
352 352 def add(self, fn):
353 353 if self.entries is None:
354 354 self._load()
355 355 if fn not in self.entries:
356 356 self._dirty = True
357 357 self.entries.add(fn)
358 358
359 359 def __contains__(self, fn):
360 360 if self.entries is None:
361 361 self._load()
362 362 return fn in self.entries
363 363
364 364 def __iter__(self):
365 365 if self.entries is None:
366 366 self._load()
367 367 return iter(self.entries)
368 368
369 369 class _fncacheopener(scmutil.abstractopener):
370 370 def __init__(self, op, fnc, encode):
371 371 self.opener = op
372 372 self.fncache = fnc
373 373 self.encode = encode
374 374
375 375 def __call__(self, path, mode='r', *args, **kw):
376 376 if mode not in ('r', 'rb') and path.startswith('data/'):
377 377 self.fncache.add(path)
378 378 return self.opener(self.encode(path), mode, *args, **kw)
379 379
380 380 class fncachestore(basicstore):
381 381 def __init__(self, path, openertype, encode):
382 382 self.encode = encode
383 383 self.path = path + '/store'
384 384 self.createmode = _calcmode(self.path)
385 385 op = openertype(self.path)
386 386 op.createmode = self.createmode
387 387 fnc = fncache(op)
388 388 self.fncache = fnc
389 389 self.opener = _fncacheopener(op, fnc, encode)
390 390
391 391 def join(self, f):
392 392 return self.path + '/' + self.encode(f)
393 393
394 def getsize(self, path):
395 return os.stat(self.path + '/' + path).st_size
396
394 397 def datafiles(self):
395 398 rewrite = False
396 399 existing = []
397 spath = self.path
398 400 for f in self.fncache:
399 401 ef = self.encode(f)
400 402 try:
401 st = os.stat(spath + '/' + ef)
402 yield f, ef, st.st_size
403 yield f, ef, self.getsize(ef)
403 404 existing.append(f)
404 405 except OSError:
405 406 # nonexistent entry
406 407 rewrite = True
407 408 if rewrite:
408 409 # rewrite fncache to remove nonexistent entries
409 410 # (may be caused by rollback / strip)
410 411 self.fncache.rewrite(existing)
411 412
412 413 def copylist(self):
413 414 d = ('data dh fncache phaseroots'
414 415 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
415 416 return (['requires', '00changelog.i'] +
416 417 ['store/' + f for f in d.split()])
417 418
418 419 def write(self):
419 420 self.fncache.write()
420 421
421 422 def store(requirements, path, openertype):
422 423 if 'store' in requirements:
423 424 if 'fncache' in requirements:
424 425 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
425 426 encode = lambda f: _hybridencode(f, auxencode)
426 427 return fncachestore(path, openertype, encode)
427 428 return encodedstore(path, openertype)
428 429 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now