##// END OF EJS Templates
store: keep an accumulated length for the shorted dirs in _hybridencode...
Adrian Buehlmann -
r17588:3d789dd7 default
parent child Browse files
Show More
@@ -1,443 +1,448 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 return (path
26 26 .replace(".hg/", ".hg.hg/")
27 27 .replace(".i/", ".i.hg/")
28 28 .replace(".d/", ".d.hg/"))
29 29
30 30 def decodedir(path):
31 31 '''
32 32 >>> decodedir('data/foo.i')
33 33 'data/foo.i'
34 34 >>> decodedir('data/foo.i.hg/bla.i')
35 35 'data/foo.i/bla.i'
36 36 >>> decodedir('data/foo.i.hg.hg/bla.i')
37 37 'data/foo.i.hg/bla.i'
38 38 '''
39 39 if ".hg/" not in path:
40 40 return path
41 41 return (path
42 42 .replace(".d.hg/", ".d/")
43 43 .replace(".i.hg/", ".i/")
44 44 .replace(".hg.hg/", ".hg/"))
45 45
46 46 def _buildencodefun():
47 47 '''
48 48 >>> enc, dec = _buildencodefun()
49 49
50 50 >>> enc('nothing/special.txt')
51 51 'nothing/special.txt'
52 52 >>> dec('nothing/special.txt')
53 53 'nothing/special.txt'
54 54
55 55 >>> enc('HELLO')
56 56 '_h_e_l_l_o'
57 57 >>> dec('_h_e_l_l_o')
58 58 'HELLO'
59 59
60 60 >>> enc('hello:world?')
61 61 'hello~3aworld~3f'
62 62 >>> dec('hello~3aworld~3f')
63 63 'hello:world?'
64 64
65 65 >>> enc('the\x07quick\xADshot')
66 66 'the~07quick~adshot'
67 67 >>> dec('the~07quick~adshot')
68 68 'the\\x07quick\\xadshot'
69 69 '''
70 70 e = '_'
71 71 winreserved = [ord(x) for x in '\\:*?"<>|']
72 72 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
73 73 for x in (range(32) + range(126, 256) + winreserved):
74 74 cmap[chr(x)] = "~%02x" % x
75 75 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
76 76 cmap[chr(x)] = e + chr(x).lower()
77 77 dmap = {}
78 78 for k, v in cmap.iteritems():
79 79 dmap[v] = k
80 80 def decode(s):
81 81 i = 0
82 82 while i < len(s):
83 83 for l in xrange(1, 4):
84 84 try:
85 85 yield dmap[s[i:i + l]]
86 86 i += l
87 87 break
88 88 except KeyError:
89 89 pass
90 90 else:
91 91 raise KeyError
92 92 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
93 93 lambda s: decodedir("".join(list(decode(s)))))
94 94
95 95 encodefilename, decodefilename = _buildencodefun()
96 96
97 97 def _buildlowerencodefun():
98 98 '''
99 99 >>> f = _buildlowerencodefun()
100 100 >>> f('nothing/special.txt')
101 101 'nothing/special.txt'
102 102 >>> f('HELLO')
103 103 'hello'
104 104 >>> f('hello:world?')
105 105 'hello~3aworld~3f'
106 106 >>> f('the\x07quick\xADshot')
107 107 'the~07quick~adshot'
108 108 '''
109 109 winreserved = [ord(x) for x in '\\:*?"<>|']
110 110 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
111 111 for x in (range(32) + range(126, 256) + winreserved):
112 112 cmap[chr(x)] = "~%02x" % x
113 113 for x in range(ord("A"), ord("Z")+1):
114 114 cmap[chr(x)] = chr(x).lower()
115 115 return lambda s: "".join([cmap[c] for c in s])
116 116
117 117 lowerencode = _buildlowerencodefun()
118 118
119 119 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
120 120 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
121 121 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
122 122 def _auxencode(path, dotencode):
123 123 '''
124 124 Encodes filenames containing names reserved by Windows or which end in
125 125 period or space. Does not touch other single reserved characters c.
126 126 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
127 127 Additionally encodes space or period at the beginning, if dotencode is
128 128 True. Parameter path is assumed to be all lowercase.
129 129 A segment only needs encoding if a reserved name appears as a
130 130 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
131 131 doesn't need encoding.
132 132
133 133 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
134 134 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
135 135 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.', False)
136 136 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
137 137 >>> _auxencode('foo. ', True)
138 138 ['foo.~20']
139 139 >>> _auxencode(' .foo', True)
140 140 ['~20.foo']
141 141 '''
142 142 res = path.split('/')
143 143 for i, n in enumerate(res):
144 144 if not n:
145 145 continue
146 146 if dotencode and n[0] in '. ':
147 147 n = "~%02x" % ord(n[0]) + n[1:]
148 148 res[i] = n
149 149 else:
150 150 l = n.find('.')
151 151 if l == -1:
152 152 l = len(n)
153 153 if ((l == 3 and n[:3] in _winres3) or
154 154 (l == 4 and n[3] <= '9' and n[3] >= '1'
155 155 and n[:3] in _winres4)):
156 156 # encode third letter ('aux' -> 'au~78')
157 157 ec = "~%02x" % ord(n[2])
158 158 n = n[0:2] + ec + n[3:]
159 159 res[i] = n
160 160 if n[-1] in '. ':
161 161 # encode last period or space ('foo...' -> 'foo..~2e')
162 162 res[i] = n[:-1] + "~%02x" % ord(n[-1])
163 163 return res
164 164
165 165 _maxstorepathlen = 120
166 166 _dirprefixlen = 8
167 167 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
168 168 def _hybridencode(path, auxencode):
169 169 '''encodes path with a length limit
170 170
171 171 Encodes all paths that begin with 'data/', according to the following.
172 172
173 173 Default encoding (reversible):
174 174
175 175 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
176 176 characters are encoded as '~xx', where xx is the two digit hex code
177 177 of the character (see encodefilename).
178 178 Relevant path components consisting of Windows reserved filenames are
179 179 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
180 180
181 181 Hashed encoding (not reversible):
182 182
183 183 If the default-encoded path is longer than _maxstorepathlen, a
184 184 non-reversible hybrid hashing of the path is done instead.
185 185 This encoding uses up to _dirprefixlen characters of all directory
186 186 levels of the lowerencoded path, but not more levels than can fit into
187 187 _maxshortdirslen.
188 188 Then follows the filler followed by the sha digest of the full path.
189 189 The filler is the beginning of the basename of the lowerencoded path
190 190 (the basename is everything after the last path separator). The filler
191 191 is as long as possible, filling in characters from the basename until
192 192 the encoded path has _maxstorepathlen characters (or all chars of the
193 193 basename have been taken).
194 194 The extension (e.g. '.i' or '.d') is preserved.
195 195
196 196 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
197 197 encoding was used.
198 198 '''
199 199 res = '/'.join(auxencode(encodefilename(path)))
200 200 if len(res) > _maxstorepathlen:
201 201 path = encodedir(path)
202 202 digest = _sha(path).hexdigest()
203 203 parts = auxencode(lowerencode(path))[1:]
204 204 basename = parts[-1]
205 205 _root, ext = os.path.splitext(basename)
206 206 sdirs = []
207 sdirslen = 0
207 208 for p in parts[:-1]:
208 209 d = p[:_dirprefixlen]
209 210 if d[-1] in '. ':
210 211 # Windows can't access dirs ending in period or space
211 212 d = d[:-1] + '_'
212 t = '/'.join(sdirs) + '/' + d
213 if len(t) > _maxshortdirslen:
214 break
213 if sdirslen == 0:
214 t = len(d)
215 else:
216 t = sdirslen + 1 + len(d)
217 if t > _maxshortdirslen:
218 break
215 219 sdirs.append(d)
220 sdirslen = t
216 221 dirs = '/'.join(sdirs)
217 222 if len(dirs) > 0:
218 223 dirs += '/'
219 224 res = 'dh/' + dirs + digest + ext
220 225 spaceleft = _maxstorepathlen - len(res)
221 226 if spaceleft > 0:
222 227 filler = basename[:spaceleft]
223 228 res = 'dh/' + dirs + filler + digest + ext
224 229 return res
225 230
226 231 def _calcmode(path):
227 232 try:
228 233 # files in .hg/ will be created using this mode
229 234 mode = os.stat(path).st_mode
230 235 # avoid some useless chmods
231 236 if (0777 & ~util.umask) == (0777 & mode):
232 237 mode = None
233 238 except OSError:
234 239 mode = None
235 240 return mode
236 241
237 242 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
238 243 ' phaseroots obsstore')
239 244
240 245 class basicstore(object):
241 246 '''base class for local repository stores'''
242 247 def __init__(self, path, openertype):
243 248 self.path = path
244 249 self.createmode = _calcmode(path)
245 250 op = openertype(self.path)
246 251 op.createmode = self.createmode
247 252 self.opener = scmutil.filteropener(op, encodedir)
248 253
249 254 def join(self, f):
250 255 return self.path + '/' + encodedir(f)
251 256
252 257 def _walk(self, relpath, recurse):
253 258 '''yields (unencoded, encoded, size)'''
254 259 path = self.path
255 260 if relpath:
256 261 path += '/' + relpath
257 262 striplen = len(self.path) + 1
258 263 l = []
259 264 if os.path.isdir(path):
260 265 visit = [path]
261 266 while visit:
262 267 p = visit.pop()
263 268 for f, kind, st in osutil.listdir(p, stat=True):
264 269 fp = p + '/' + f
265 270 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
266 271 n = util.pconvert(fp[striplen:])
267 272 l.append((decodedir(n), n, st.st_size))
268 273 elif kind == stat.S_IFDIR and recurse:
269 274 visit.append(fp)
270 275 l.sort()
271 276 return l
272 277
273 278 def datafiles(self):
274 279 return self._walk('data', True)
275 280
276 281 def walk(self):
277 282 '''yields (unencoded, encoded, size)'''
278 283 # yield data files first
279 284 for x in self.datafiles():
280 285 yield x
281 286 # yield manifest before changelog
282 287 for x in reversed(self._walk('', False)):
283 288 yield x
284 289
285 290 def copylist(self):
286 291 return ['requires'] + _data.split()
287 292
288 293 def write(self):
289 294 pass
290 295
291 296 class encodedstore(basicstore):
292 297 def __init__(self, path, openertype):
293 298 self.path = path + '/store'
294 299 self.createmode = _calcmode(self.path)
295 300 op = openertype(self.path)
296 301 op.createmode = self.createmode
297 302 self.opener = scmutil.filteropener(op, encodefilename)
298 303
299 304 def datafiles(self):
300 305 for a, b, size in self._walk('data', True):
301 306 try:
302 307 a = decodefilename(a)
303 308 except KeyError:
304 309 a = None
305 310 yield a, b, size
306 311
307 312 def join(self, f):
308 313 return self.path + '/' + encodefilename(f)
309 314
310 315 def copylist(self):
311 316 return (['requires', '00changelog.i'] +
312 317 ['store/' + f for f in _data.split()])
313 318
314 319 class fncache(object):
315 320 # the filename used to be partially encoded
316 321 # hence the encodedir/decodedir dance
317 322 def __init__(self, opener):
318 323 self.opener = opener
319 324 self.entries = None
320 325 self._dirty = False
321 326
322 327 def _load(self):
323 328 '''fill the entries from the fncache file'''
324 329 self._dirty = False
325 330 try:
326 331 fp = self.opener('fncache', mode='rb')
327 332 except IOError:
328 333 # skip nonexistent file
329 334 self.entries = set()
330 335 return
331 336 self.entries = set(map(decodedir, fp.read().splitlines()))
332 337 if '' in self.entries:
333 338 fp.seek(0)
334 339 for n, line in enumerate(fp):
335 340 if not line.rstrip('\n'):
336 341 t = _('invalid entry in fncache, line %s') % (n + 1)
337 342 raise util.Abort(t)
338 343 fp.close()
339 344
340 345 def _write(self, files, atomictemp):
341 346 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
342 347 if files:
343 348 fp.write('\n'.join(map(encodedir, files)) + '\n')
344 349 fp.close()
345 350 self._dirty = False
346 351
347 352 def rewrite(self, files):
348 353 self._write(files, False)
349 354 self.entries = set(files)
350 355
351 356 def write(self):
352 357 if self._dirty:
353 358 self._write(self.entries, True)
354 359
355 360 def add(self, fn):
356 361 if self.entries is None:
357 362 self._load()
358 363 if fn not in self.entries:
359 364 self._dirty = True
360 365 self.entries.add(fn)
361 366
362 367 def __contains__(self, fn):
363 368 if self.entries is None:
364 369 self._load()
365 370 return fn in self.entries
366 371
367 372 def __iter__(self):
368 373 if self.entries is None:
369 374 self._load()
370 375 return iter(self.entries)
371 376
372 377 class _fncacheopener(scmutil.abstractopener):
373 378 def __init__(self, op, fnc, encode):
374 379 self.opener = op
375 380 self.fncache = fnc
376 381 self.encode = encode
377 382
378 383 def _getmustaudit(self):
379 384 return self.opener.mustaudit
380 385
381 386 def _setmustaudit(self, onoff):
382 387 self.opener.mustaudit = onoff
383 388
384 389 mustaudit = property(_getmustaudit, _setmustaudit)
385 390
386 391 def __call__(self, path, mode='r', *args, **kw):
387 392 if mode not in ('r', 'rb') and path.startswith('data/'):
388 393 self.fncache.add(path)
389 394 return self.opener(self.encode(path), mode, *args, **kw)
390 395
391 396 class fncachestore(basicstore):
392 397 def __init__(self, path, openertype, encode):
393 398 self.encode = encode
394 399 self.path = path + '/store'
395 400 self.pathsep = self.path + '/'
396 401 self.createmode = _calcmode(self.path)
397 402 op = openertype(self.path)
398 403 op.createmode = self.createmode
399 404 fnc = fncache(op)
400 405 self.fncache = fnc
401 406 self.opener = _fncacheopener(op, fnc, encode)
402 407
403 408 def join(self, f):
404 409 return self.pathsep + self.encode(f)
405 410
406 411 def getsize(self, path):
407 412 return os.stat(self.pathsep + path).st_size
408 413
409 414 def datafiles(self):
410 415 rewrite = False
411 416 existing = []
412 417 for f in sorted(self.fncache):
413 418 ef = self.encode(f)
414 419 try:
415 420 yield f, ef, self.getsize(ef)
416 421 existing.append(f)
417 422 except OSError, err:
418 423 if err.errno != errno.ENOENT:
419 424 raise
420 425 # nonexistent entry
421 426 rewrite = True
422 427 if rewrite:
423 428 # rewrite fncache to remove nonexistent entries
424 429 # (may be caused by rollback / strip)
425 430 self.fncache.rewrite(existing)
426 431
427 432 def copylist(self):
428 433 d = ('data dh fncache phaseroots obsstore'
429 434 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
430 435 return (['requires', '00changelog.i'] +
431 436 ['store/' + f for f in d.split()])
432 437
433 438 def write(self):
434 439 self.fncache.write()
435 440
436 441 def store(requirements, path, openertype):
437 442 if 'store' in requirements:
438 443 if 'fncache' in requirements:
439 444 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
440 445 encode = lambda f: _hybridencode(f, auxencode)
441 446 return fncachestore(path, openertype, encode)
442 447 return encodedstore(path, openertype)
443 448 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now