##// END OF EJS Templates
store: optimize fncache._write by direncoding the contents in one go...
Adrian Buehlmann -
r17592:64c6a0d4 default
parent child Browse files
Show More
@@ -1,459 +1,459 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 return (path
26 26 .replace(".hg/", ".hg.hg/")
27 27 .replace(".i/", ".i.hg/")
28 28 .replace(".d/", ".d.hg/"))
29 29
30 30 def decodedir(path):
31 31 '''
32 32 >>> decodedir('data/foo.i')
33 33 'data/foo.i'
34 34 >>> decodedir('data/foo.i.hg/bla.i')
35 35 'data/foo.i/bla.i'
36 36 >>> decodedir('data/foo.i.hg.hg/bla.i')
37 37 'data/foo.i.hg/bla.i'
38 38 '''
39 39 if ".hg/" not in path:
40 40 return path
41 41 return (path
42 42 .replace(".d.hg/", ".d/")
43 43 .replace(".i.hg/", ".i/")
44 44 .replace(".hg.hg/", ".hg/"))
45 45
46 46 def _buildencodefun():
47 47 '''
48 48 >>> enc, dec = _buildencodefun()
49 49
50 50 >>> enc('nothing/special.txt')
51 51 'nothing/special.txt'
52 52 >>> dec('nothing/special.txt')
53 53 'nothing/special.txt'
54 54
55 55 >>> enc('HELLO')
56 56 '_h_e_l_l_o'
57 57 >>> dec('_h_e_l_l_o')
58 58 'HELLO'
59 59
60 60 >>> enc('hello:world?')
61 61 'hello~3aworld~3f'
62 62 >>> dec('hello~3aworld~3f')
63 63 'hello:world?'
64 64
65 65 >>> enc('the\x07quick\xADshot')
66 66 'the~07quick~adshot'
67 67 >>> dec('the~07quick~adshot')
68 68 'the\\x07quick\\xadshot'
69 69 '''
70 70 e = '_'
71 71 winreserved = [ord(x) for x in '\\:*?"<>|']
72 72 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
73 73 for x in (range(32) + range(126, 256) + winreserved):
74 74 cmap[chr(x)] = "~%02x" % x
75 75 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
76 76 cmap[chr(x)] = e + chr(x).lower()
77 77 dmap = {}
78 78 for k, v in cmap.iteritems():
79 79 dmap[v] = k
80 80 def decode(s):
81 81 i = 0
82 82 while i < len(s):
83 83 for l in xrange(1, 4):
84 84 try:
85 85 yield dmap[s[i:i + l]]
86 86 i += l
87 87 break
88 88 except KeyError:
89 89 pass
90 90 else:
91 91 raise KeyError
92 92 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
93 93 lambda s: decodedir("".join(list(decode(s)))))
94 94
95 95 encodefilename, decodefilename = _buildencodefun()
96 96
97 97 def _buildlowerencodefun():
98 98 '''
99 99 >>> f = _buildlowerencodefun()
100 100 >>> f('nothing/special.txt')
101 101 'nothing/special.txt'
102 102 >>> f('HELLO')
103 103 'hello'
104 104 >>> f('hello:world?')
105 105 'hello~3aworld~3f'
106 106 >>> f('the\x07quick\xADshot')
107 107 'the~07quick~adshot'
108 108 '''
109 109 winreserved = [ord(x) for x in '\\:*?"<>|']
110 110 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
111 111 for x in (range(32) + range(126, 256) + winreserved):
112 112 cmap[chr(x)] = "~%02x" % x
113 113 for x in range(ord("A"), ord("Z")+1):
114 114 cmap[chr(x)] = chr(x).lower()
115 115 return lambda s: "".join([cmap[c] for c in s])
116 116
117 117 lowerencode = _buildlowerencodefun()
118 118
119 119 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
120 120 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
121 121 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
122 122 def _auxencode(path, dotencode):
123 123 '''
124 124 Encodes filenames containing names reserved by Windows or which end in
125 125 period or space. Does not touch other single reserved characters c.
126 126 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
127 127 Additionally encodes space or period at the beginning, if dotencode is
128 128 True. Parameter path is assumed to be all lowercase.
129 129 A segment only needs encoding if a reserved name appears as a
130 130 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
131 131 doesn't need encoding.
132 132
133 133 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
134 134 >>> _auxencode(s.split('/'), True)
135 135 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
136 136 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
137 137 >>> _auxencode(s.split('/'), False)
138 138 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
139 139 >>> _auxencode(['foo. '], True)
140 140 ['foo.~20']
141 141 >>> _auxencode([' .foo'], True)
142 142 ['~20.foo']
143 143 '''
144 144 for i, n in enumerate(path):
145 145 if not n:
146 146 continue
147 147 if dotencode and n[0] in '. ':
148 148 n = "~%02x" % ord(n[0]) + n[1:]
149 149 path[i] = n
150 150 else:
151 151 l = n.find('.')
152 152 if l == -1:
153 153 l = len(n)
154 154 if ((l == 3 and n[:3] in _winres3) or
155 155 (l == 4 and n[3] <= '9' and n[3] >= '1'
156 156 and n[:3] in _winres4)):
157 157 # encode third letter ('aux' -> 'au~78')
158 158 ec = "~%02x" % ord(n[2])
159 159 n = n[0:2] + ec + n[3:]
160 160 path[i] = n
161 161 if n[-1] in '. ':
162 162 # encode last period or space ('foo...' -> 'foo..~2e')
163 163 path[i] = n[:-1] + "~%02x" % ord(n[-1])
164 164 return path
165 165
166 166 _maxstorepathlen = 120
167 167 _dirprefixlen = 8
168 168 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
169 169 def _hybridencode(path, dotencode):
170 170 '''encodes path with a length limit
171 171
172 172 Encodes all paths that begin with 'data/', according to the following.
173 173
174 174 Default encoding (reversible):
175 175
176 176 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
177 177 characters are encoded as '~xx', where xx is the two digit hex code
178 178 of the character (see encodefilename).
179 179 Relevant path components consisting of Windows reserved filenames are
180 180 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
181 181
182 182 Hashed encoding (not reversible):
183 183
184 184 If the default-encoded path is longer than _maxstorepathlen, a
185 185 non-reversible hybrid hashing of the path is done instead.
186 186 This encoding uses up to _dirprefixlen characters of all directory
187 187 levels of the lowerencoded path, but not more levels than can fit into
188 188 _maxshortdirslen.
189 189 Then follows the filler followed by the sha digest of the full path.
190 190 The filler is the beginning of the basename of the lowerencoded path
191 191 (the basename is everything after the last path separator). The filler
192 192 is as long as possible, filling in characters from the basename until
193 193 the encoded path has _maxstorepathlen characters (or all chars of the
194 194 basename have been taken).
195 195 The extension (e.g. '.i' or '.d') is preserved.
196 196
197 197 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
198 198 encoding was used.
199 199 '''
200 200 ef = encodefilename(path).split('/')
201 201 res = '/'.join(_auxencode(ef, dotencode))
202 202 if len(res) > _maxstorepathlen:
203 203 path = encodedir(path)
204 204 digest = _sha(path).hexdigest()
205 205 le = lowerencode(path).split('/')[1:]
206 206 parts = _auxencode(le, dotencode)
207 207 basename = parts[-1]
208 208 _root, ext = os.path.splitext(basename)
209 209 sdirs = []
210 210 sdirslen = 0
211 211 for p in parts[:-1]:
212 212 d = p[:_dirprefixlen]
213 213 if d[-1] in '. ':
214 214 # Windows can't access dirs ending in period or space
215 215 d = d[:-1] + '_'
216 216 if sdirslen == 0:
217 217 t = len(d)
218 218 else:
219 219 t = sdirslen + 1 + len(d)
220 220 if t > _maxshortdirslen:
221 221 break
222 222 sdirs.append(d)
223 223 sdirslen = t
224 224 dirs = '/'.join(sdirs)
225 225 if len(dirs) > 0:
226 226 dirs += '/'
227 227 res = 'dh/' + dirs + digest + ext
228 228 spaceleft = _maxstorepathlen - len(res)
229 229 if spaceleft > 0:
230 230 filler = basename[:spaceleft]
231 231 res = 'dh/' + dirs + filler + digest + ext
232 232 return res
233 233
234 234 def _calcmode(path):
235 235 try:
236 236 # files in .hg/ will be created using this mode
237 237 mode = os.stat(path).st_mode
238 238 # avoid some useless chmods
239 239 if (0777 & ~util.umask) == (0777 & mode):
240 240 mode = None
241 241 except OSError:
242 242 mode = None
243 243 return mode
244 244
245 245 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
246 246 ' phaseroots obsstore')
247 247
248 248 class basicstore(object):
249 249 '''base class for local repository stores'''
250 250 def __init__(self, path, openertype):
251 251 self.path = path
252 252 self.createmode = _calcmode(path)
253 253 op = openertype(self.path)
254 254 op.createmode = self.createmode
255 255 self.opener = scmutil.filteropener(op, encodedir)
256 256
257 257 def join(self, f):
258 258 return self.path + '/' + encodedir(f)
259 259
260 260 def _walk(self, relpath, recurse):
261 261 '''yields (unencoded, encoded, size)'''
262 262 path = self.path
263 263 if relpath:
264 264 path += '/' + relpath
265 265 striplen = len(self.path) + 1
266 266 l = []
267 267 if os.path.isdir(path):
268 268 visit = [path]
269 269 while visit:
270 270 p = visit.pop()
271 271 for f, kind, st in osutil.listdir(p, stat=True):
272 272 fp = p + '/' + f
273 273 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
274 274 n = util.pconvert(fp[striplen:])
275 275 l.append((decodedir(n), n, st.st_size))
276 276 elif kind == stat.S_IFDIR and recurse:
277 277 visit.append(fp)
278 278 l.sort()
279 279 return l
280 280
281 281 def datafiles(self):
282 282 return self._walk('data', True)
283 283
284 284 def walk(self):
285 285 '''yields (unencoded, encoded, size)'''
286 286 # yield data files first
287 287 for x in self.datafiles():
288 288 yield x
289 289 # yield manifest before changelog
290 290 for x in reversed(self._walk('', False)):
291 291 yield x
292 292
293 293 def copylist(self):
294 294 return ['requires'] + _data.split()
295 295
296 296 def write(self):
297 297 pass
298 298
299 299 class encodedstore(basicstore):
300 300 def __init__(self, path, openertype):
301 301 self.path = path + '/store'
302 302 self.createmode = _calcmode(self.path)
303 303 op = openertype(self.path)
304 304 op.createmode = self.createmode
305 305 self.opener = scmutil.filteropener(op, encodefilename)
306 306
307 307 def datafiles(self):
308 308 for a, b, size in self._walk('data', True):
309 309 try:
310 310 a = decodefilename(a)
311 311 except KeyError:
312 312 a = None
313 313 yield a, b, size
314 314
315 315 def join(self, f):
316 316 return self.path + '/' + encodefilename(f)
317 317
318 318 def copylist(self):
319 319 return (['requires', '00changelog.i'] +
320 320 ['store/' + f for f in _data.split()])
321 321
322 322 class fncache(object):
323 323 # the filename used to be partially encoded
324 324 # hence the encodedir/decodedir dance
325 325 def __init__(self, opener):
326 326 self.opener = opener
327 327 self.entries = None
328 328 self._dirty = False
329 329
330 330 def _load(self):
331 331 '''fill the entries from the fncache file'''
332 332 self._dirty = False
333 333 try:
334 334 fp = self.opener('fncache', mode='rb')
335 335 except IOError:
336 336 # skip nonexistent file
337 337 self.entries = set()
338 338 return
339 339 self.entries = set(map(decodedir, fp.read().splitlines()))
340 340 if '' in self.entries:
341 341 fp.seek(0)
342 342 for n, line in enumerate(fp):
343 343 if not line.rstrip('\n'):
344 344 t = _('invalid entry in fncache, line %s') % (n + 1)
345 345 raise util.Abort(t)
346 346 fp.close()
347 347
348 348 def _write(self, files, atomictemp):
349 349 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
350 350 if files:
351 fp.write('\n'.join(map(encodedir, files)) + '\n')
351 fp.write(encodedir('\n'.join(files) + '\n'))
352 352 fp.close()
353 353 self._dirty = False
354 354
355 355 def rewrite(self, files):
356 356 self._write(files, False)
357 357 self.entries = set(files)
358 358
359 359 def write(self):
360 360 if self._dirty:
361 361 self._write(self.entries, True)
362 362
363 363 def add(self, fn):
364 364 if self.entries is None:
365 365 self._load()
366 366 if fn not in self.entries:
367 367 self._dirty = True
368 368 self.entries.add(fn)
369 369
370 370 def __contains__(self, fn):
371 371 if self.entries is None:
372 372 self._load()
373 373 return fn in self.entries
374 374
375 375 def __iter__(self):
376 376 if self.entries is None:
377 377 self._load()
378 378 return iter(self.entries)
379 379
380 380 class _fncacheopener(scmutil.abstractopener):
381 381 def __init__(self, op, fnc, encode):
382 382 self.opener = op
383 383 self.fncache = fnc
384 384 self.encode = encode
385 385
386 386 def _getmustaudit(self):
387 387 return self.opener.mustaudit
388 388
389 389 def _setmustaudit(self, onoff):
390 390 self.opener.mustaudit = onoff
391 391
392 392 mustaudit = property(_getmustaudit, _setmustaudit)
393 393
394 394 def __call__(self, path, mode='r', *args, **kw):
395 395 if mode not in ('r', 'rb') and path.startswith('data/'):
396 396 self.fncache.add(path)
397 397 return self.opener(self.encode(path), mode, *args, **kw)
398 398
399 399 def _plainhybridencode(f):
400 400 return _hybridencode(f, False)
401 401
402 402 def _dothybridencode(f):
403 403 return _hybridencode(f, True)
404 404
405 405 class fncachestore(basicstore):
406 406 def __init__(self, path, openertype, dotencode):
407 407 if dotencode:
408 408 encode = _dothybridencode
409 409 else:
410 410 encode = _plainhybridencode
411 411 self.encode = encode
412 412 self.path = path + '/store'
413 413 self.pathsep = self.path + '/'
414 414 self.createmode = _calcmode(self.path)
415 415 op = openertype(self.path)
416 416 op.createmode = self.createmode
417 417 fnc = fncache(op)
418 418 self.fncache = fnc
419 419 self.opener = _fncacheopener(op, fnc, encode)
420 420
421 421 def join(self, f):
422 422 return self.pathsep + self.encode(f)
423 423
424 424 def getsize(self, path):
425 425 return os.stat(self.pathsep + path).st_size
426 426
427 427 def datafiles(self):
428 428 rewrite = False
429 429 existing = []
430 430 for f in sorted(self.fncache):
431 431 ef = self.encode(f)
432 432 try:
433 433 yield f, ef, self.getsize(ef)
434 434 existing.append(f)
435 435 except OSError, err:
436 436 if err.errno != errno.ENOENT:
437 437 raise
438 438 # nonexistent entry
439 439 rewrite = True
440 440 if rewrite:
441 441 # rewrite fncache to remove nonexistent entries
442 442 # (may be caused by rollback / strip)
443 443 self.fncache.rewrite(existing)
444 444
445 445 def copylist(self):
446 446 d = ('data dh fncache phaseroots obsstore'
447 447 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
448 448 return (['requires', '00changelog.i'] +
449 449 ['store/' + f for f in d.split()])
450 450
451 451 def write(self):
452 452 self.fncache.write()
453 453
454 454 def store(requirements, path, openertype):
455 455 if 'store' in requirements:
456 456 if 'fncache' in requirements:
457 457 return fncachestore(path, openertype, 'dotencode' in requirements)
458 458 return encodedstore(path, openertype)
459 459 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now