##// END OF EJS Templates
store: speed up read and write of large fncache files...
Bryan O'Sullivan -
r16404:9fca5b05 default
parent child Browse files
Show More
@@ -1,427 +1,427 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 121 _winreservednames = '''con prn aux nul
122 122 com1 com2 com3 com4 com5 com6 com7 com8 com9
123 123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 130 True.
131 131 path is assumed to be all lowercase.
132 132
133 133 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
134 134 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
135 135 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/foo.', False)
136 136 '.com1com2/lp~749.lpt4.lpt1/conprn/foo~2e'
137 137 >>> _auxencode('foo. ', True)
138 138 'foo.~20'
139 139 >>> _auxencode(' .foo', True)
140 140 '~20.foo'
141 141 '''
142 142 res = []
143 143 for n in path.split('/'):
144 144 if n:
145 145 base = n.split('.')[0]
146 146 if base and (base in _winreservednames):
147 147 # encode third letter ('aux' -> 'au~78')
148 148 ec = "~%02x" % ord(n[2])
149 149 n = n[0:2] + ec + n[3:]
150 150 if n[-1] in '. ':
151 151 # encode last period or space ('foo...' -> 'foo..~2e')
152 152 n = n[:-1] + "~%02x" % ord(n[-1])
153 153 if dotencode and n[0] in '. ':
154 154 n = "~%02x" % ord(n[0]) + n[1:]
155 155 res.append(n)
156 156 return '/'.join(res)
157 157
158 158 _maxstorepathlen = 120
159 159 _dirprefixlen = 8
160 160 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
161 161 def _hybridencode(path, auxencode):
162 162 '''encodes path with a length limit
163 163
164 164 Encodes all paths that begin with 'data/', according to the following.
165 165
166 166 Default encoding (reversible):
167 167
168 168 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
169 169 characters are encoded as '~xx', where xx is the two digit hex code
170 170 of the character (see encodefilename).
171 171 Relevant path components consisting of Windows reserved filenames are
172 172 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
173 173
174 174 Hashed encoding (not reversible):
175 175
176 176 If the default-encoded path is longer than _maxstorepathlen, a
177 177 non-reversible hybrid hashing of the path is done instead.
178 178 This encoding uses up to _dirprefixlen characters of all directory
179 179 levels of the lowerencoded path, but not more levels than can fit into
180 180 _maxshortdirslen.
181 181 Then follows the filler followed by the sha digest of the full path.
182 182 The filler is the beginning of the basename of the lowerencoded path
183 183 (the basename is everything after the last path separator). The filler
184 184 is as long as possible, filling in characters from the basename until
185 185 the encoded path has _maxstorepathlen characters (or all chars of the
186 186 basename have been taken).
187 187 The extension (e.g. '.i' or '.d') is preserved.
188 188
189 189 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
190 190 encoding was used.
191 191 '''
192 192 if not path.startswith('data/'):
193 193 return path
194 194 # escape directories ending with .i and .d
195 195 path = encodedir(path)
196 196 ndpath = path[len('data/'):]
197 197 res = 'data/' + auxencode(encodefilename(ndpath))
198 198 if len(res) > _maxstorepathlen:
199 199 digest = _sha(path).hexdigest()
200 200 aep = auxencode(lowerencode(ndpath))
201 201 _root, ext = os.path.splitext(aep)
202 202 parts = aep.split('/')
203 203 basename = parts[-1]
204 204 sdirs = []
205 205 for p in parts[:-1]:
206 206 d = p[:_dirprefixlen]
207 207 if d[-1] in '. ':
208 208 # Windows can't access dirs ending in period or space
209 209 d = d[:-1] + '_'
210 210 t = '/'.join(sdirs) + '/' + d
211 211 if len(t) > _maxshortdirslen:
212 212 break
213 213 sdirs.append(d)
214 214 dirs = '/'.join(sdirs)
215 215 if len(dirs) > 0:
216 216 dirs += '/'
217 217 res = 'dh/' + dirs + digest + ext
218 218 spaceleft = _maxstorepathlen - len(res)
219 219 if spaceleft > 0:
220 220 filler = basename[:spaceleft]
221 221 res = 'dh/' + dirs + filler + digest + ext
222 222 return res
223 223
224 224 def _calcmode(path):
225 225 try:
226 226 # files in .hg/ will be created using this mode
227 227 mode = os.stat(path).st_mode
228 228 # avoid some useless chmods
229 229 if (0777 & ~util.umask) == (0777 & mode):
230 230 mode = None
231 231 except OSError:
232 232 mode = None
233 233 return mode
234 234
235 235 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i phaseroots'
236 236
237 237 class basicstore(object):
238 238 '''base class for local repository stores'''
239 239 def __init__(self, path, openertype):
240 240 self.path = path
241 241 self.createmode = _calcmode(path)
242 242 op = openertype(self.path)
243 243 op.createmode = self.createmode
244 244 self.opener = scmutil.filteropener(op, encodedir)
245 245
246 246 def join(self, f):
247 247 return self.path + '/' + encodedir(f)
248 248
249 249 def _walk(self, relpath, recurse):
250 250 '''yields (unencoded, encoded, size)'''
251 251 path = self.path
252 252 if relpath:
253 253 path += '/' + relpath
254 254 striplen = len(self.path) + 1
255 255 l = []
256 256 if os.path.isdir(path):
257 257 visit = [path]
258 258 while visit:
259 259 p = visit.pop()
260 260 for f, kind, st in osutil.listdir(p, stat=True):
261 261 fp = p + '/' + f
262 262 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
263 263 n = util.pconvert(fp[striplen:])
264 264 l.append((decodedir(n), n, st.st_size))
265 265 elif kind == stat.S_IFDIR and recurse:
266 266 visit.append(fp)
267 267 return sorted(l)
268 268
269 269 def datafiles(self):
270 270 return self._walk('data', True)
271 271
272 272 def walk(self):
273 273 '''yields (unencoded, encoded, size)'''
274 274 # yield data files first
275 275 for x in self.datafiles():
276 276 yield x
277 277 # yield manifest before changelog
278 278 for x in reversed(self._walk('', False)):
279 279 yield x
280 280
281 281 def copylist(self):
282 282 return ['requires'] + _data.split()
283 283
284 284 def write(self):
285 285 pass
286 286
287 287 class encodedstore(basicstore):
288 288 def __init__(self, path, openertype):
289 289 self.path = path + '/store'
290 290 self.createmode = _calcmode(self.path)
291 291 op = openertype(self.path)
292 292 op.createmode = self.createmode
293 293 self.opener = scmutil.filteropener(op, encodefilename)
294 294
295 295 def datafiles(self):
296 296 for a, b, size in self._walk('data', True):
297 297 try:
298 298 a = decodefilename(a)
299 299 except KeyError:
300 300 a = None
301 301 yield a, b, size
302 302
303 303 def join(self, f):
304 304 return self.path + '/' + encodefilename(f)
305 305
306 306 def copylist(self):
307 307 return (['requires', '00changelog.i'] +
308 308 ['store/' + f for f in _data.split()])
309 309
310 310 class fncache(object):
311 311 # the filename used to be partially encoded
312 312 # hence the encodedir/decodedir dance
313 313 def __init__(self, opener):
314 314 self.opener = opener
315 315 self.entries = None
316 316 self._dirty = False
317 317
318 318 def _load(self):
319 319 '''fill the entries from the fncache file'''
320 self.entries = set()
321 320 self._dirty = False
322 321 try:
323 322 fp = self.opener('fncache', mode='rb')
324 323 except IOError:
325 324 # skip nonexistent file
325 self.entries = set()
326 326 return
327 for n, line in enumerate(fp):
328 if (len(line) < 2) or (line[-1] != '\n'):
329 t = _('invalid entry in fncache, line %s') % (n + 1)
330 raise util.Abort(t)
331 self.entries.add(decodedir(line[:-1]))
327 self.entries = set(map(decodedir, fp.read().splitlines()))
328 if '' in self.entries:
329 fp.seek(0)
330 for n, line in enumerate(fp):
331 if not line.rstrip('\n'):
332 t = _('invalid entry in fncache, line %s') % (n + 1)
333 raise util.Abort(t)
332 334 fp.close()
333 335
336 def _write(self, files, atomictemp):
337 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
338 if files:
339 fp.write('\n'.join(map(encodedir, files)) + '\n')
340 fp.close()
341 self._dirty = False
342
334 343 def rewrite(self, files):
335 fp = self.opener('fncache', mode='wb')
336 for p in files:
337 fp.write(encodedir(p) + '\n')
338 fp.close()
344 self._write(files, False)
339 345 self.entries = set(files)
340 self._dirty = False
341 346
342 347 def write(self):
343 if not self._dirty:
344 return
345 fp = self.opener('fncache', mode='wb', atomictemp=True)
346 for p in self.entries:
347 fp.write(encodedir(p) + '\n')
348 fp.close()
349 self._dirty = False
348 if self._dirty:
349 self._write(self.entries, True)
350 350
351 351 def add(self, fn):
352 352 if self.entries is None:
353 353 self._load()
354 354 if fn not in self.entries:
355 355 self._dirty = True
356 356 self.entries.add(fn)
357 357
358 358 def __contains__(self, fn):
359 359 if self.entries is None:
360 360 self._load()
361 361 return fn in self.entries
362 362
363 363 def __iter__(self):
364 364 if self.entries is None:
365 365 self._load()
366 366 return iter(self.entries)
367 367
368 368 class _fncacheopener(scmutil.abstractopener):
369 369 def __init__(self, op, fnc, encode):
370 370 self.opener = op
371 371 self.fncache = fnc
372 372 self.encode = encode
373 373
374 374 def __call__(self, path, mode='r', *args, **kw):
375 375 if mode not in ('r', 'rb') and path.startswith('data/'):
376 376 self.fncache.add(path)
377 377 return self.opener(self.encode(path), mode, *args, **kw)
378 378
379 379 class fncachestore(basicstore):
380 380 def __init__(self, path, openertype, encode):
381 381 self.encode = encode
382 382 self.path = path + '/store'
383 383 self.createmode = _calcmode(self.path)
384 384 op = openertype(self.path)
385 385 op.createmode = self.createmode
386 386 fnc = fncache(op)
387 387 self.fncache = fnc
388 388 self.opener = _fncacheopener(op, fnc, encode)
389 389
390 390 def join(self, f):
391 391 return self.path + '/' + self.encode(f)
392 392
393 393 def datafiles(self):
394 394 rewrite = False
395 395 existing = []
396 396 spath = self.path
397 397 for f in self.fncache:
398 398 ef = self.encode(f)
399 399 try:
400 400 st = os.stat(spath + '/' + ef)
401 401 yield f, ef, st.st_size
402 402 existing.append(f)
403 403 except OSError:
404 404 # nonexistent entry
405 405 rewrite = True
406 406 if rewrite:
407 407 # rewrite fncache to remove nonexistent entries
408 408 # (may be caused by rollback / strip)
409 409 self.fncache.rewrite(existing)
410 410
411 411 def copylist(self):
412 412 d = ('data dh fncache phaseroots'
413 413 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
414 414 return (['requires', '00changelog.i'] +
415 415 ['store/' + f for f in d.split()])
416 416
417 417 def write(self):
418 418 self.fncache.write()
419 419
420 420 def store(requirements, path, openertype):
421 421 if 'store' in requirements:
422 422 if 'fncache' in requirements:
423 423 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
424 424 encode = lambda f: _hybridencode(f, auxencode)
425 425 return fncachestore(path, openertype, encode)
426 426 return encodedstore(path, openertype)
427 427 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now