##// END OF EJS Templates
store: optimize _pathencode by checking the length of the unencoded path...
Adrian Buehlmann -
r17693:0c6de45e default
parent child Browse files
Show More
@@ -1,497 +1,499 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util, parsers
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def _encodedir(path):
17 17 '''
18 18 >>> _encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> _encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> _encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 26 '''
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33 33
34 34 def decodedir(path):
35 35 '''
36 36 >>> decodedir('data/foo.i')
37 37 'data/foo.i'
38 38 >>> decodedir('data/foo.i.hg/bla.i')
39 39 'data/foo.i/bla.i'
40 40 >>> decodedir('data/foo.i.hg.hg/bla.i')
41 41 'data/foo.i.hg/bla.i'
42 42 '''
43 43 if ".hg/" not in path:
44 44 return path
45 45 return (path
46 46 .replace(".d.hg/", ".d/")
47 47 .replace(".i.hg/", ".i/")
48 48 .replace(".hg.hg/", ".hg/"))
49 49
50 50 def _buildencodefun():
51 51 '''
52 52 >>> enc, dec = _buildencodefun()
53 53
54 54 >>> enc('nothing/special.txt')
55 55 'nothing/special.txt'
56 56 >>> dec('nothing/special.txt')
57 57 'nothing/special.txt'
58 58
59 59 >>> enc('HELLO')
60 60 '_h_e_l_l_o'
61 61 >>> dec('_h_e_l_l_o')
62 62 'HELLO'
63 63
64 64 >>> enc('hello:world?')
65 65 'hello~3aworld~3f'
66 66 >>> dec('hello~3aworld~3f')
67 67 'hello:world?'
68 68
69 69 >>> enc('the\x07quick\xADshot')
70 70 'the~07quick~adshot'
71 71 >>> dec('the~07quick~adshot')
72 72 'the\\x07quick\\xadshot'
73 73 '''
74 74 e = '_'
75 75 winreserved = [ord(x) for x in '\\:*?"<>|']
76 76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
77 77 for x in (range(32) + range(126, 256) + winreserved):
78 78 cmap[chr(x)] = "~%02x" % x
79 79 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
80 80 cmap[chr(x)] = e + chr(x).lower()
81 81 dmap = {}
82 82 for k, v in cmap.iteritems():
83 83 dmap[v] = k
84 84 def decode(s):
85 85 i = 0
86 86 while i < len(s):
87 87 for l in xrange(1, 4):
88 88 try:
89 89 yield dmap[s[i:i + l]]
90 90 i += l
91 91 break
92 92 except KeyError:
93 93 pass
94 94 else:
95 95 raise KeyError
96 96 return (lambda s: ''.join([cmap[c] for c in s]),
97 97 lambda s: ''.join(list(decode(s))))
98 98
99 99 _encodefname, _decodefname = _buildencodefun()
100 100
101 101 def encodefilename(s):
102 102 '''
103 103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
104 104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
105 105 '''
106 106 return _encodefname(encodedir(s))
107 107
108 108 def decodefilename(s):
109 109 '''
110 110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
111 111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
112 112 '''
113 113 return decodedir(_decodefname(s))
114 114
115 115 def _buildlowerencodefun():
116 116 '''
117 117 >>> f = _buildlowerencodefun()
118 118 >>> f('nothing/special.txt')
119 119 'nothing/special.txt'
120 120 >>> f('HELLO')
121 121 'hello'
122 122 >>> f('hello:world?')
123 123 'hello~3aworld~3f'
124 124 >>> f('the\x07quick\xADshot')
125 125 'the~07quick~adshot'
126 126 '''
127 127 winreserved = [ord(x) for x in '\\:*?"<>|']
128 128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
129 129 for x in (range(32) + range(126, 256) + winreserved):
130 130 cmap[chr(x)] = "~%02x" % x
131 131 for x in range(ord("A"), ord("Z")+1):
132 132 cmap[chr(x)] = chr(x).lower()
133 133 return lambda s: "".join([cmap[c] for c in s])
134 134
135 135 lowerencode = _buildlowerencodefun()
136 136
137 137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
138 138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
139 139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
140 140 def _auxencode(path, dotencode):
141 141 '''
142 142 Encodes filenames containing names reserved by Windows or which end in
143 143 period or space. Does not touch other single reserved characters c.
144 144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
145 145 Additionally encodes space or period at the beginning, if dotencode is
146 146 True. Parameter path is assumed to be all lowercase.
147 147 A segment only needs encoding if a reserved name appears as a
148 148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
149 149 doesn't need encoding.
150 150
151 151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
152 152 >>> _auxencode(s.split('/'), True)
153 153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
154 154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
155 155 >>> _auxencode(s.split('/'), False)
156 156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
157 157 >>> _auxencode(['foo. '], True)
158 158 ['foo.~20']
159 159 >>> _auxencode([' .foo'], True)
160 160 ['~20.foo']
161 161 '''
162 162 for i, n in enumerate(path):
163 163 if not n:
164 164 continue
165 165 if dotencode and n[0] in '. ':
166 166 n = "~%02x" % ord(n[0]) + n[1:]
167 167 path[i] = n
168 168 else:
169 169 l = n.find('.')
170 170 if l == -1:
171 171 l = len(n)
172 172 if ((l == 3 and n[:3] in _winres3) or
173 173 (l == 4 and n[3] <= '9' and n[3] >= '1'
174 174 and n[:3] in _winres4)):
175 175 # encode third letter ('aux' -> 'au~78')
176 176 ec = "~%02x" % ord(n[2])
177 177 n = n[0:2] + ec + n[3:]
178 178 path[i] = n
179 179 if n[-1] in '. ':
180 180 # encode last period or space ('foo...' -> 'foo..~2e')
181 181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
182 182 return path
183 183
184 184 _maxstorepathlen = 120
185 185 _dirprefixlen = 8
186 186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
187 187
188 188 def _hashencode(path, dotencode):
189 189 digest = _sha(path).hexdigest()
190 190 le = lowerencode(path).split('/')[1:]
191 191 parts = _auxencode(le, dotencode)
192 192 basename = parts[-1]
193 193 _root, ext = os.path.splitext(basename)
194 194 sdirs = []
195 195 sdirslen = 0
196 196 for p in parts[:-1]:
197 197 d = p[:_dirprefixlen]
198 198 if d[-1] in '. ':
199 199 # Windows can't access dirs ending in period or space
200 200 d = d[:-1] + '_'
201 201 if sdirslen == 0:
202 202 t = len(d)
203 203 else:
204 204 t = sdirslen + 1 + len(d)
205 205 if t > _maxshortdirslen:
206 206 break
207 207 sdirs.append(d)
208 208 sdirslen = t
209 209 dirs = '/'.join(sdirs)
210 210 if len(dirs) > 0:
211 211 dirs += '/'
212 212 res = 'dh/' + dirs + digest + ext
213 213 spaceleft = _maxstorepathlen - len(res)
214 214 if spaceleft > 0:
215 215 filler = basename[:spaceleft]
216 216 res = 'dh/' + dirs + filler + digest + ext
217 217 return res
218 218
219 219 def _hybridencode(path, dotencode):
220 220 '''encodes path with a length limit
221 221
222 222 Encodes all paths that begin with 'data/', according to the following.
223 223
224 224 Default encoding (reversible):
225 225
226 226 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
227 227 characters are encoded as '~xx', where xx is the two digit hex code
228 228 of the character (see encodefilename).
229 229 Relevant path components consisting of Windows reserved filenames are
230 230 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
231 231
232 232 Hashed encoding (not reversible):
233 233
234 234 If the default-encoded path is longer than _maxstorepathlen, a
235 235 non-reversible hybrid hashing of the path is done instead.
236 236 This encoding uses up to _dirprefixlen characters of all directory
237 237 levels of the lowerencoded path, but not more levels than can fit into
238 238 _maxshortdirslen.
239 239 Then follows the filler followed by the sha digest of the full path.
240 240 The filler is the beginning of the basename of the lowerencoded path
241 241 (the basename is everything after the last path separator). The filler
242 242 is as long as possible, filling in characters from the basename until
243 243 the encoded path has _maxstorepathlen characters (or all chars of the
244 244 basename have been taken).
245 245 The extension (e.g. '.i' or '.d') is preserved.
246 246
247 247 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
248 248 encoding was used.
249 249 '''
250 250 path = encodedir(path)
251 251 ef = _encodefname(path).split('/')
252 252 res = '/'.join(_auxencode(ef, dotencode))
253 253 if len(res) > _maxstorepathlen:
254 254 res = _hashencode(path, dotencode)
255 255 return res
256 256
257 257 def _pathencode(path):
258 if len(path) > _maxstorepathlen:
259 return None
258 260 ef = _encodefname(encodedir(path)).split('/')
259 261 res = '/'.join(_auxencode(ef, True))
260 262 if len(res) > _maxstorepathlen:
261 263 return None
262 264 return res
263 265
264 266 _pathencode = getattr(parsers, 'pathencode', _pathencode)
265 267
266 268 def _dothybridencode(f):
267 269 ef = _pathencode(f)
268 270 if ef is None:
269 271 return _hashencode(encodedir(f), True)
270 272 return ef
271 273
272 274 def _plainhybridencode(f):
273 275 return _hybridencode(f, False)
274 276
275 277 def _calcmode(path):
276 278 try:
277 279 # files in .hg/ will be created using this mode
278 280 mode = os.stat(path).st_mode
279 281 # avoid some useless chmods
280 282 if (0777 & ~util.umask) == (0777 & mode):
281 283 mode = None
282 284 except OSError:
283 285 mode = None
284 286 return mode
285 287
286 288 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
287 289 ' phaseroots obsstore')
288 290
289 291 class basicstore(object):
290 292 '''base class for local repository stores'''
291 293 def __init__(self, path, vfstype):
292 294 self.path = path
293 295 self.createmode = _calcmode(path)
294 296 vfs = vfstype(self.path)
295 297 vfs.createmode = self.createmode
296 298 self.vfs = scmutil.filtervfs(vfs, encodedir)
297 299 self.opener = self.vfs
298 300
299 301 def join(self, f):
300 302 return self.path + '/' + encodedir(f)
301 303
302 304 def _walk(self, relpath, recurse):
303 305 '''yields (unencoded, encoded, size)'''
304 306 path = self.path
305 307 if relpath:
306 308 path += '/' + relpath
307 309 striplen = len(self.path) + 1
308 310 l = []
309 311 if os.path.isdir(path):
310 312 visit = [path]
311 313 while visit:
312 314 p = visit.pop()
313 315 for f, kind, st in osutil.listdir(p, stat=True):
314 316 fp = p + '/' + f
315 317 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
316 318 n = util.pconvert(fp[striplen:])
317 319 l.append((decodedir(n), n, st.st_size))
318 320 elif kind == stat.S_IFDIR and recurse:
319 321 visit.append(fp)
320 322 l.sort()
321 323 return l
322 324
323 325 def datafiles(self):
324 326 return self._walk('data', True)
325 327
326 328 def walk(self):
327 329 '''yields (unencoded, encoded, size)'''
328 330 # yield data files first
329 331 for x in self.datafiles():
330 332 yield x
331 333 # yield manifest before changelog
332 334 for x in reversed(self._walk('', False)):
333 335 yield x
334 336
335 337 def copylist(self):
336 338 return ['requires'] + _data.split()
337 339
338 340 def write(self):
339 341 pass
340 342
341 343 class encodedstore(basicstore):
342 344 def __init__(self, path, vfstype):
343 345 self.path = path + '/store'
344 346 self.createmode = _calcmode(self.path)
345 347 vfs = vfstype(self.path)
346 348 vfs.createmode = self.createmode
347 349 self.vfs = scmutil.filtervfs(vfs, encodefilename)
348 350 self.opener = self.vfs
349 351
350 352 def datafiles(self):
351 353 for a, b, size in self._walk('data', True):
352 354 try:
353 355 a = decodefilename(a)
354 356 except KeyError:
355 357 a = None
356 358 yield a, b, size
357 359
358 360 def join(self, f):
359 361 return self.path + '/' + encodefilename(f)
360 362
361 363 def copylist(self):
362 364 return (['requires', '00changelog.i'] +
363 365 ['store/' + f for f in _data.split()])
364 366
365 367 class fncache(object):
366 368 # the filename used to be partially encoded
367 369 # hence the encodedir/decodedir dance
368 370 def __init__(self, opener):
369 371 self.opener = opener
370 372 self.entries = None
371 373 self._dirty = False
372 374
373 375 def _load(self):
374 376 '''fill the entries from the fncache file'''
375 377 self._dirty = False
376 378 try:
377 379 fp = self.opener('fncache', mode='rb')
378 380 except IOError:
379 381 # skip nonexistent file
380 382 self.entries = set()
381 383 return
382 384 self.entries = set(decodedir(fp.read()).splitlines())
383 385 if '' in self.entries:
384 386 fp.seek(0)
385 387 for n, line in enumerate(fp):
386 388 if not line.rstrip('\n'):
387 389 t = _('invalid entry in fncache, line %s') % (n + 1)
388 390 raise util.Abort(t)
389 391 fp.close()
390 392
391 393 def _write(self, files, atomictemp):
392 394 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
393 395 if files:
394 396 fp.write(encodedir('\n'.join(files) + '\n'))
395 397 fp.close()
396 398 self._dirty = False
397 399
398 400 def rewrite(self, files):
399 401 self._write(files, False)
400 402 self.entries = set(files)
401 403
402 404 def write(self):
403 405 if self._dirty:
404 406 self._write(self.entries, True)
405 407
406 408 def add(self, fn):
407 409 if self.entries is None:
408 410 self._load()
409 411 if fn not in self.entries:
410 412 self._dirty = True
411 413 self.entries.add(fn)
412 414
413 415 def __contains__(self, fn):
414 416 if self.entries is None:
415 417 self._load()
416 418 return fn in self.entries
417 419
418 420 def __iter__(self):
419 421 if self.entries is None:
420 422 self._load()
421 423 return iter(self.entries)
422 424
423 425 class _fncachevfs(scmutil.abstractvfs):
424 426 def __init__(self, op, fnc, encode):
425 427 self.opener = op
426 428 self.fncache = fnc
427 429 self.encode = encode
428 430
429 431 def _getmustaudit(self):
430 432 return self.opener.mustaudit
431 433
432 434 def _setmustaudit(self, onoff):
433 435 self.opener.mustaudit = onoff
434 436
435 437 mustaudit = property(_getmustaudit, _setmustaudit)
436 438
437 439 def __call__(self, path, mode='r', *args, **kw):
438 440 if mode not in ('r', 'rb') and path.startswith('data/'):
439 441 self.fncache.add(path)
440 442 return self.opener(self.encode(path), mode, *args, **kw)
441 443
442 444 class fncachestore(basicstore):
443 445 def __init__(self, path, vfstype, dotencode):
444 446 if dotencode:
445 447 encode = _dothybridencode
446 448 else:
447 449 encode = _plainhybridencode
448 450 self.encode = encode
449 451 self.path = path + '/store'
450 452 self.pathsep = self.path + '/'
451 453 self.createmode = _calcmode(self.path)
452 454 vfs = vfstype(self.path)
453 455 vfs.createmode = self.createmode
454 456 fnc = fncache(vfs)
455 457 self.fncache = fnc
456 458 self.vfs = _fncachevfs(vfs, fnc, encode)
457 459 self.opener = self.vfs
458 460
459 461 def join(self, f):
460 462 return self.pathsep + self.encode(f)
461 463
462 464 def getsize(self, path):
463 465 return os.stat(self.pathsep + path).st_size
464 466
465 467 def datafiles(self):
466 468 rewrite = False
467 469 existing = []
468 470 for f in sorted(self.fncache):
469 471 ef = self.encode(f)
470 472 try:
471 473 yield f, ef, self.getsize(ef)
472 474 existing.append(f)
473 475 except OSError, err:
474 476 if err.errno != errno.ENOENT:
475 477 raise
476 478 # nonexistent entry
477 479 rewrite = True
478 480 if rewrite:
479 481 # rewrite fncache to remove nonexistent entries
480 482 # (may be caused by rollback / strip)
481 483 self.fncache.rewrite(existing)
482 484
483 485 def copylist(self):
484 486 d = ('data dh fncache phaseroots obsstore'
485 487 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
486 488 return (['requires', '00changelog.i'] +
487 489 ['store/' + f for f in d.split()])
488 490
489 491 def write(self):
490 492 self.fncache.write()
491 493
492 494 def store(requirements, path, vfstype):
493 495 if 'store' in requirements:
494 496 if 'fncache' in requirements:
495 497 return fncachestore(path, vfstype, 'dotencode' in requirements)
496 498 return encodedstore(path, vfstype)
497 499 return basicstore(path, vfstype)
General Comments 0
You need to be logged in to leave comments. Login now