##// END OF EJS Templates
store: optimize _auxencode() by assigning to the list elements of the path
Adrian Buehlmann -
r17571:7ed972a9 default
parent child Browse files
Show More
@@ -1,448 +1,450 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 121 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
122 122 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
123 123 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 130 True. Parameter path is assumed to be all lowercase.
131 131 A segment only needs encoding if a reserved name appears as a
132 132 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
133 133 doesn't need encoding.
134 134
135 135 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
136 136 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
137 137 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.', False)
138 138 '.com1com2/lp~749.lpt4.lpt1/conprn/com0/lpt0/foo~2e'
139 139 >>> _auxencode('foo. ', True)
140 140 'foo.~20'
141 141 >>> _auxencode(' .foo', True)
142 142 '~20.foo'
143 143 '''
144 res = []
145 for n in path.split('/'):
144 res = path.split('/')
145 for i, n in enumerate(res):
146 146 if n:
147 147 if dotencode and n[0] in '. ':
148 148 n = "~%02x" % ord(n[0]) + n[1:]
149 res[i] = n
149 150 else:
150 151 l = n.find('.')
151 152 if l == -1:
152 153 l = len(n)
153 154 if ((l == 3 and n[:3] in _winres3) or
154 155 (l == 4 and n[3] <= '9' and n[3] >= '1'
155 156 and n[:3] in _winres4)):
156 157 # encode third letter ('aux' -> 'au~78')
157 158 ec = "~%02x" % ord(n[2])
158 159 n = n[0:2] + ec + n[3:]
160 res[i] = n
159 161 if n[-1] in '. ':
160 162 # encode last period or space ('foo...' -> 'foo..~2e')
161 163 n = n[:-1] + "~%02x" % ord(n[-1])
162 res.append(n)
164 res[i] = n
163 165 return '/'.join(res)
164 166
165 167 _maxstorepathlen = 120
166 168 _dirprefixlen = 8
167 169 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
168 170 def _hybridencode(path, auxencode):
169 171 '''encodes path with a length limit
170 172
171 173 Encodes all paths that begin with 'data/', according to the following.
172 174
173 175 Default encoding (reversible):
174 176
175 177 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
176 178 characters are encoded as '~xx', where xx is the two digit hex code
177 179 of the character (see encodefilename).
178 180 Relevant path components consisting of Windows reserved filenames are
179 181 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
180 182
181 183 Hashed encoding (not reversible):
182 184
183 185 If the default-encoded path is longer than _maxstorepathlen, a
184 186 non-reversible hybrid hashing of the path is done instead.
185 187 This encoding uses up to _dirprefixlen characters of all directory
186 188 levels of the lowerencoded path, but not more levels than can fit into
187 189 _maxshortdirslen.
188 190 Then follows the filler followed by the sha digest of the full path.
189 191 The filler is the beginning of the basename of the lowerencoded path
190 192 (the basename is everything after the last path separator). The filler
191 193 is as long as possible, filling in characters from the basename until
192 194 the encoded path has _maxstorepathlen characters (or all chars of the
193 195 basename have been taken).
194 196 The extension (e.g. '.i' or '.d') is preserved.
195 197
196 198 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
197 199 encoding was used.
198 200 '''
199 201 if not path.startswith('data/'):
200 202 return path
201 203 # escape directories ending with .i and .d
202 204 path = encodedir(path)
203 205 ndpath = path[len('data/'):]
204 206 res = 'data/' + auxencode(encodefilename(ndpath))
205 207 if len(res) > _maxstorepathlen:
206 208 digest = _sha(path).hexdigest()
207 209 aep = auxencode(lowerencode(ndpath))
208 210 _root, ext = os.path.splitext(aep)
209 211 parts = aep.split('/')
210 212 basename = parts[-1]
211 213 sdirs = []
212 214 for p in parts[:-1]:
213 215 d = p[:_dirprefixlen]
214 216 if d[-1] in '. ':
215 217 # Windows can't access dirs ending in period or space
216 218 d = d[:-1] + '_'
217 219 t = '/'.join(sdirs) + '/' + d
218 220 if len(t) > _maxshortdirslen:
219 221 break
220 222 sdirs.append(d)
221 223 dirs = '/'.join(sdirs)
222 224 if len(dirs) > 0:
223 225 dirs += '/'
224 226 res = 'dh/' + dirs + digest + ext
225 227 spaceleft = _maxstorepathlen - len(res)
226 228 if spaceleft > 0:
227 229 filler = basename[:spaceleft]
228 230 res = 'dh/' + dirs + filler + digest + ext
229 231 return res
230 232
231 233 def _calcmode(path):
232 234 try:
233 235 # files in .hg/ will be created using this mode
234 236 mode = os.stat(path).st_mode
235 237 # avoid some useless chmods
236 238 if (0777 & ~util.umask) == (0777 & mode):
237 239 mode = None
238 240 except OSError:
239 241 mode = None
240 242 return mode
241 243
242 244 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
243 245 ' phaseroots obsstore')
244 246
245 247 class basicstore(object):
246 248 '''base class for local repository stores'''
247 249 def __init__(self, path, openertype):
248 250 self.path = path
249 251 self.createmode = _calcmode(path)
250 252 op = openertype(self.path)
251 253 op.createmode = self.createmode
252 254 self.opener = scmutil.filteropener(op, encodedir)
253 255
254 256 def join(self, f):
255 257 return self.path + '/' + encodedir(f)
256 258
257 259 def _walk(self, relpath, recurse):
258 260 '''yields (unencoded, encoded, size)'''
259 261 path = self.path
260 262 if relpath:
261 263 path += '/' + relpath
262 264 striplen = len(self.path) + 1
263 265 l = []
264 266 if os.path.isdir(path):
265 267 visit = [path]
266 268 while visit:
267 269 p = visit.pop()
268 270 for f, kind, st in osutil.listdir(p, stat=True):
269 271 fp = p + '/' + f
270 272 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
271 273 n = util.pconvert(fp[striplen:])
272 274 l.append((decodedir(n), n, st.st_size))
273 275 elif kind == stat.S_IFDIR and recurse:
274 276 visit.append(fp)
275 277 l.sort()
276 278 return l
277 279
278 280 def datafiles(self):
279 281 return self._walk('data', True)
280 282
281 283 def walk(self):
282 284 '''yields (unencoded, encoded, size)'''
283 285 # yield data files first
284 286 for x in self.datafiles():
285 287 yield x
286 288 # yield manifest before changelog
287 289 for x in reversed(self._walk('', False)):
288 290 yield x
289 291
290 292 def copylist(self):
291 293 return ['requires'] + _data.split()
292 294
293 295 def write(self):
294 296 pass
295 297
296 298 class encodedstore(basicstore):
297 299 def __init__(self, path, openertype):
298 300 self.path = path + '/store'
299 301 self.createmode = _calcmode(self.path)
300 302 op = openertype(self.path)
301 303 op.createmode = self.createmode
302 304 self.opener = scmutil.filteropener(op, encodefilename)
303 305
304 306 def datafiles(self):
305 307 for a, b, size in self._walk('data', True):
306 308 try:
307 309 a = decodefilename(a)
308 310 except KeyError:
309 311 a = None
310 312 yield a, b, size
311 313
312 314 def join(self, f):
313 315 return self.path + '/' + encodefilename(f)
314 316
315 317 def copylist(self):
316 318 return (['requires', '00changelog.i'] +
317 319 ['store/' + f for f in _data.split()])
318 320
319 321 class fncache(object):
320 322 # the filename used to be partially encoded
321 323 # hence the encodedir/decodedir dance
322 324 def __init__(self, opener):
323 325 self.opener = opener
324 326 self.entries = None
325 327 self._dirty = False
326 328
327 329 def _load(self):
328 330 '''fill the entries from the fncache file'''
329 331 self._dirty = False
330 332 try:
331 333 fp = self.opener('fncache', mode='rb')
332 334 except IOError:
333 335 # skip nonexistent file
334 336 self.entries = set()
335 337 return
336 338 self.entries = set(map(decodedir, fp.read().splitlines()))
337 339 if '' in self.entries:
338 340 fp.seek(0)
339 341 for n, line in enumerate(fp):
340 342 if not line.rstrip('\n'):
341 343 t = _('invalid entry in fncache, line %s') % (n + 1)
342 344 raise util.Abort(t)
343 345 fp.close()
344 346
345 347 def _write(self, files, atomictemp):
346 348 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
347 349 if files:
348 350 fp.write('\n'.join(map(encodedir, files)) + '\n')
349 351 fp.close()
350 352 self._dirty = False
351 353
352 354 def rewrite(self, files):
353 355 self._write(files, False)
354 356 self.entries = set(files)
355 357
356 358 def write(self):
357 359 if self._dirty:
358 360 self._write(self.entries, True)
359 361
360 362 def add(self, fn):
361 363 if self.entries is None:
362 364 self._load()
363 365 if fn not in self.entries:
364 366 self._dirty = True
365 367 self.entries.add(fn)
366 368
367 369 def __contains__(self, fn):
368 370 if self.entries is None:
369 371 self._load()
370 372 return fn in self.entries
371 373
372 374 def __iter__(self):
373 375 if self.entries is None:
374 376 self._load()
375 377 return iter(self.entries)
376 378
377 379 class _fncacheopener(scmutil.abstractopener):
378 380 def __init__(self, op, fnc, encode):
379 381 self.opener = op
380 382 self.fncache = fnc
381 383 self.encode = encode
382 384
383 385 def _getmustaudit(self):
384 386 return self.opener.mustaudit
385 387
386 388 def _setmustaudit(self, onoff):
387 389 self.opener.mustaudit = onoff
388 390
389 391 mustaudit = property(_getmustaudit, _setmustaudit)
390 392
391 393 def __call__(self, path, mode='r', *args, **kw):
392 394 if mode not in ('r', 'rb') and path.startswith('data/'):
393 395 self.fncache.add(path)
394 396 return self.opener(self.encode(path), mode, *args, **kw)
395 397
396 398 class fncachestore(basicstore):
397 399 def __init__(self, path, openertype, encode):
398 400 self.encode = encode
399 401 self.path = path + '/store'
400 402 self.pathsep = self.path + '/'
401 403 self.createmode = _calcmode(self.path)
402 404 op = openertype(self.path)
403 405 op.createmode = self.createmode
404 406 fnc = fncache(op)
405 407 self.fncache = fnc
406 408 self.opener = _fncacheopener(op, fnc, encode)
407 409
408 410 def join(self, f):
409 411 return self.pathsep + self.encode(f)
410 412
411 413 def getsize(self, path):
412 414 return os.stat(self.pathsep + path).st_size
413 415
414 416 def datafiles(self):
415 417 rewrite = False
416 418 existing = []
417 419 for f in sorted(self.fncache):
418 420 ef = self.encode(f)
419 421 try:
420 422 yield f, ef, self.getsize(ef)
421 423 existing.append(f)
422 424 except OSError, err:
423 425 if err.errno != errno.ENOENT:
424 426 raise
425 427 # nonexistent entry
426 428 rewrite = True
427 429 if rewrite:
428 430 # rewrite fncache to remove nonexistent entries
429 431 # (may be caused by rollback / strip)
430 432 self.fncache.rewrite(existing)
431 433
432 434 def copylist(self):
433 435 d = ('data dh fncache phaseroots obsstore'
434 436 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
435 437 return (['requires', '00changelog.i'] +
436 438 ['store/' + f for f in d.split()])
437 439
438 440 def write(self):
439 441 self.fncache.write()
440 442
441 443 def store(requirements, path, openertype):
442 444 if 'store' in requirements:
443 445 if 'fncache' in requirements:
444 446 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
445 447 encode = lambda f: _hybridencode(f, auxencode)
446 448 return fncachestore(path, openertype, encode)
447 449 return encodedstore(path, openertype)
448 450 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now