##// END OF EJS Templates
store: optimze _auxencode() a bit by grouping the reserved names by length...
Adrian Buehlmann -
r17570:f53a7b25 default
parent child Browse files
Show More
@@ -1,443 +1,448 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 _winreservednames = '''con prn aux nul
122 com1 com2 com3 com4 com5 com6 com7 com8 com9
123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
121 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
122 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
123 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 130 True. Parameter path is assumed to be all lowercase.
131 131 A segment only needs encoding if a reserved name appears as a
132 132 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
133 133 doesn't need encoding.
134 134
135 135 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
136 136 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
137 137 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.', False)
138 138 '.com1com2/lp~749.lpt4.lpt1/conprn/com0/lpt0/foo~2e'
139 139 >>> _auxencode('foo. ', True)
140 140 'foo.~20'
141 141 >>> _auxencode(' .foo', True)
142 142 '~20.foo'
143 143 '''
144 144 res = []
145 145 for n in path.split('/'):
146 146 if n:
147 base = n.split('.')[0]
148 if base and (base in _winreservednames):
149 # encode third letter ('aux' -> 'au~78')
150 ec = "~%02x" % ord(n[2])
151 n = n[0:2] + ec + n[3:]
147 if dotencode and n[0] in '. ':
148 n = "~%02x" % ord(n[0]) + n[1:]
149 else:
150 l = n.find('.')
151 if l == -1:
152 l = len(n)
153 if ((l == 3 and n[:3] in _winres3) or
154 (l == 4 and n[3] <= '9' and n[3] >= '1'
155 and n[:3] in _winres4)):
156 # encode third letter ('aux' -> 'au~78')
157 ec = "~%02x" % ord(n[2])
158 n = n[0:2] + ec + n[3:]
152 159 if n[-1] in '. ':
153 160 # encode last period or space ('foo...' -> 'foo..~2e')
154 161 n = n[:-1] + "~%02x" % ord(n[-1])
155 if dotencode and n[0] in '. ':
156 n = "~%02x" % ord(n[0]) + n[1:]
157 162 res.append(n)
158 163 return '/'.join(res)
159 164
160 165 _maxstorepathlen = 120
161 166 _dirprefixlen = 8
162 167 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
163 168 def _hybridencode(path, auxencode):
164 169 '''encodes path with a length limit
165 170
166 171 Encodes all paths that begin with 'data/', according to the following.
167 172
168 173 Default encoding (reversible):
169 174
170 175 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
171 176 characters are encoded as '~xx', where xx is the two digit hex code
172 177 of the character (see encodefilename).
173 178 Relevant path components consisting of Windows reserved filenames are
174 179 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
175 180
176 181 Hashed encoding (not reversible):
177 182
178 183 If the default-encoded path is longer than _maxstorepathlen, a
179 184 non-reversible hybrid hashing of the path is done instead.
180 185 This encoding uses up to _dirprefixlen characters of all directory
181 186 levels of the lowerencoded path, but not more levels than can fit into
182 187 _maxshortdirslen.
183 188 Then follows the filler followed by the sha digest of the full path.
184 189 The filler is the beginning of the basename of the lowerencoded path
185 190 (the basename is everything after the last path separator). The filler
186 191 is as long as possible, filling in characters from the basename until
187 192 the encoded path has _maxstorepathlen characters (or all chars of the
188 193 basename have been taken).
189 194 The extension (e.g. '.i' or '.d') is preserved.
190 195
191 196 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
192 197 encoding was used.
193 198 '''
194 199 if not path.startswith('data/'):
195 200 return path
196 201 # escape directories ending with .i and .d
197 202 path = encodedir(path)
198 203 ndpath = path[len('data/'):]
199 204 res = 'data/' + auxencode(encodefilename(ndpath))
200 205 if len(res) > _maxstorepathlen:
201 206 digest = _sha(path).hexdigest()
202 207 aep = auxencode(lowerencode(ndpath))
203 208 _root, ext = os.path.splitext(aep)
204 209 parts = aep.split('/')
205 210 basename = parts[-1]
206 211 sdirs = []
207 212 for p in parts[:-1]:
208 213 d = p[:_dirprefixlen]
209 214 if d[-1] in '. ':
210 215 # Windows can't access dirs ending in period or space
211 216 d = d[:-1] + '_'
212 217 t = '/'.join(sdirs) + '/' + d
213 218 if len(t) > _maxshortdirslen:
214 219 break
215 220 sdirs.append(d)
216 221 dirs = '/'.join(sdirs)
217 222 if len(dirs) > 0:
218 223 dirs += '/'
219 224 res = 'dh/' + dirs + digest + ext
220 225 spaceleft = _maxstorepathlen - len(res)
221 226 if spaceleft > 0:
222 227 filler = basename[:spaceleft]
223 228 res = 'dh/' + dirs + filler + digest + ext
224 229 return res
225 230
226 231 def _calcmode(path):
227 232 try:
228 233 # files in .hg/ will be created using this mode
229 234 mode = os.stat(path).st_mode
230 235 # avoid some useless chmods
231 236 if (0777 & ~util.umask) == (0777 & mode):
232 237 mode = None
233 238 except OSError:
234 239 mode = None
235 240 return mode
236 241
237 242 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
238 243 ' phaseroots obsstore')
239 244
240 245 class basicstore(object):
241 246 '''base class for local repository stores'''
242 247 def __init__(self, path, openertype):
243 248 self.path = path
244 249 self.createmode = _calcmode(path)
245 250 op = openertype(self.path)
246 251 op.createmode = self.createmode
247 252 self.opener = scmutil.filteropener(op, encodedir)
248 253
249 254 def join(self, f):
250 255 return self.path + '/' + encodedir(f)
251 256
252 257 def _walk(self, relpath, recurse):
253 258 '''yields (unencoded, encoded, size)'''
254 259 path = self.path
255 260 if relpath:
256 261 path += '/' + relpath
257 262 striplen = len(self.path) + 1
258 263 l = []
259 264 if os.path.isdir(path):
260 265 visit = [path]
261 266 while visit:
262 267 p = visit.pop()
263 268 for f, kind, st in osutil.listdir(p, stat=True):
264 269 fp = p + '/' + f
265 270 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
266 271 n = util.pconvert(fp[striplen:])
267 272 l.append((decodedir(n), n, st.st_size))
268 273 elif kind == stat.S_IFDIR and recurse:
269 274 visit.append(fp)
270 275 l.sort()
271 276 return l
272 277
273 278 def datafiles(self):
274 279 return self._walk('data', True)
275 280
276 281 def walk(self):
277 282 '''yields (unencoded, encoded, size)'''
278 283 # yield data files first
279 284 for x in self.datafiles():
280 285 yield x
281 286 # yield manifest before changelog
282 287 for x in reversed(self._walk('', False)):
283 288 yield x
284 289
285 290 def copylist(self):
286 291 return ['requires'] + _data.split()
287 292
288 293 def write(self):
289 294 pass
290 295
291 296 class encodedstore(basicstore):
292 297 def __init__(self, path, openertype):
293 298 self.path = path + '/store'
294 299 self.createmode = _calcmode(self.path)
295 300 op = openertype(self.path)
296 301 op.createmode = self.createmode
297 302 self.opener = scmutil.filteropener(op, encodefilename)
298 303
299 304 def datafiles(self):
300 305 for a, b, size in self._walk('data', True):
301 306 try:
302 307 a = decodefilename(a)
303 308 except KeyError:
304 309 a = None
305 310 yield a, b, size
306 311
307 312 def join(self, f):
308 313 return self.path + '/' + encodefilename(f)
309 314
310 315 def copylist(self):
311 316 return (['requires', '00changelog.i'] +
312 317 ['store/' + f for f in _data.split()])
313 318
314 319 class fncache(object):
315 320 # the filename used to be partially encoded
316 321 # hence the encodedir/decodedir dance
317 322 def __init__(self, opener):
318 323 self.opener = opener
319 324 self.entries = None
320 325 self._dirty = False
321 326
322 327 def _load(self):
323 328 '''fill the entries from the fncache file'''
324 329 self._dirty = False
325 330 try:
326 331 fp = self.opener('fncache', mode='rb')
327 332 except IOError:
328 333 # skip nonexistent file
329 334 self.entries = set()
330 335 return
331 336 self.entries = set(map(decodedir, fp.read().splitlines()))
332 337 if '' in self.entries:
333 338 fp.seek(0)
334 339 for n, line in enumerate(fp):
335 340 if not line.rstrip('\n'):
336 341 t = _('invalid entry in fncache, line %s') % (n + 1)
337 342 raise util.Abort(t)
338 343 fp.close()
339 344
340 345 def _write(self, files, atomictemp):
341 346 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
342 347 if files:
343 348 fp.write('\n'.join(map(encodedir, files)) + '\n')
344 349 fp.close()
345 350 self._dirty = False
346 351
347 352 def rewrite(self, files):
348 353 self._write(files, False)
349 354 self.entries = set(files)
350 355
351 356 def write(self):
352 357 if self._dirty:
353 358 self._write(self.entries, True)
354 359
355 360 def add(self, fn):
356 361 if self.entries is None:
357 362 self._load()
358 363 if fn not in self.entries:
359 364 self._dirty = True
360 365 self.entries.add(fn)
361 366
362 367 def __contains__(self, fn):
363 368 if self.entries is None:
364 369 self._load()
365 370 return fn in self.entries
366 371
367 372 def __iter__(self):
368 373 if self.entries is None:
369 374 self._load()
370 375 return iter(self.entries)
371 376
372 377 class _fncacheopener(scmutil.abstractopener):
373 378 def __init__(self, op, fnc, encode):
374 379 self.opener = op
375 380 self.fncache = fnc
376 381 self.encode = encode
377 382
378 383 def _getmustaudit(self):
379 384 return self.opener.mustaudit
380 385
381 386 def _setmustaudit(self, onoff):
382 387 self.opener.mustaudit = onoff
383 388
384 389 mustaudit = property(_getmustaudit, _setmustaudit)
385 390
386 391 def __call__(self, path, mode='r', *args, **kw):
387 392 if mode not in ('r', 'rb') and path.startswith('data/'):
388 393 self.fncache.add(path)
389 394 return self.opener(self.encode(path), mode, *args, **kw)
390 395
391 396 class fncachestore(basicstore):
392 397 def __init__(self, path, openertype, encode):
393 398 self.encode = encode
394 399 self.path = path + '/store'
395 400 self.pathsep = self.path + '/'
396 401 self.createmode = _calcmode(self.path)
397 402 op = openertype(self.path)
398 403 op.createmode = self.createmode
399 404 fnc = fncache(op)
400 405 self.fncache = fnc
401 406 self.opener = _fncacheopener(op, fnc, encode)
402 407
403 408 def join(self, f):
404 409 return self.pathsep + self.encode(f)
405 410
406 411 def getsize(self, path):
407 412 return os.stat(self.pathsep + path).st_size
408 413
409 414 def datafiles(self):
410 415 rewrite = False
411 416 existing = []
412 417 for f in sorted(self.fncache):
413 418 ef = self.encode(f)
414 419 try:
415 420 yield f, ef, self.getsize(ef)
416 421 existing.append(f)
417 422 except OSError, err:
418 423 if err.errno != errno.ENOENT:
419 424 raise
420 425 # nonexistent entry
421 426 rewrite = True
422 427 if rewrite:
423 428 # rewrite fncache to remove nonexistent entries
424 429 # (may be caused by rollback / strip)
425 430 self.fncache.rewrite(existing)
426 431
427 432 def copylist(self):
428 433 d = ('data dh fncache phaseroots obsstore'
429 434 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
430 435 return (['requires', '00changelog.i'] +
431 436 ['store/' + f for f in d.split()])
432 437
433 438 def write(self):
434 439 self.fncache.write()
435 440
436 441 def store(requirements, path, openertype):
437 442 if 'store' in requirements:
438 443 if 'fncache' in requirements:
439 444 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
440 445 encode = lambda f: _hybridencode(f, auxencode)
441 446 return fncachestore(path, openertype, encode)
442 447 return encodedstore(path, openertype)
443 448 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now