##// END OF EJS Templates
store: reuse direncoded path in _hybridencode...
Adrian Buehlmann -
r17609:cbc180cf default
parent child Browse files
Show More
@@ -1,477 +1,477 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util, parsers
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def _encodedir(path):
17 17 '''
18 18 >>> _encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> _encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> _encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 26 '''
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33 33
34 34 def decodedir(path):
35 35 '''
36 36 >>> decodedir('data/foo.i')
37 37 'data/foo.i'
38 38 >>> decodedir('data/foo.i.hg/bla.i')
39 39 'data/foo.i/bla.i'
40 40 >>> decodedir('data/foo.i.hg.hg/bla.i')
41 41 'data/foo.i.hg/bla.i'
42 42 '''
43 43 if ".hg/" not in path:
44 44 return path
45 45 return (path
46 46 .replace(".d.hg/", ".d/")
47 47 .replace(".i.hg/", ".i/")
48 48 .replace(".hg.hg/", ".hg/"))
49 49
50 50 def _buildencodefun():
51 51 '''
52 52 >>> enc, dec = _buildencodefun()
53 53
54 54 >>> enc('nothing/special.txt')
55 55 'nothing/special.txt'
56 56 >>> dec('nothing/special.txt')
57 57 'nothing/special.txt'
58 58
59 59 >>> enc('HELLO')
60 60 '_h_e_l_l_o'
61 61 >>> dec('_h_e_l_l_o')
62 62 'HELLO'
63 63
64 64 >>> enc('hello:world?')
65 65 'hello~3aworld~3f'
66 66 >>> dec('hello~3aworld~3f')
67 67 'hello:world?'
68 68
69 69 >>> enc('the\x07quick\xADshot')
70 70 'the~07quick~adshot'
71 71 >>> dec('the~07quick~adshot')
72 72 'the\\x07quick\\xadshot'
73 73 '''
74 74 e = '_'
75 75 winreserved = [ord(x) for x in '\\:*?"<>|']
76 76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
77 77 for x in (range(32) + range(126, 256) + winreserved):
78 78 cmap[chr(x)] = "~%02x" % x
79 79 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
80 80 cmap[chr(x)] = e + chr(x).lower()
81 81 dmap = {}
82 82 for k, v in cmap.iteritems():
83 83 dmap[v] = k
84 84 def decode(s):
85 85 i = 0
86 86 while i < len(s):
87 87 for l in xrange(1, 4):
88 88 try:
89 89 yield dmap[s[i:i + l]]
90 90 i += l
91 91 break
92 92 except KeyError:
93 93 pass
94 94 else:
95 95 raise KeyError
96 96 return (lambda s: ''.join([cmap[c] for c in s]),
97 97 lambda s: ''.join(list(decode(s))))
98 98
99 99 _encodefname, _decodefname = _buildencodefun()
100 100
101 101 def encodefilename(s):
102 102 '''
103 103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
104 104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
105 105 '''
106 106 return _encodefname(encodedir(s))
107 107
108 108 def decodefilename(s):
109 109 '''
110 110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
111 111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
112 112 '''
113 113 return decodedir(_decodefname(s))
114 114
115 115 def _buildlowerencodefun():
116 116 '''
117 117 >>> f = _buildlowerencodefun()
118 118 >>> f('nothing/special.txt')
119 119 'nothing/special.txt'
120 120 >>> f('HELLO')
121 121 'hello'
122 122 >>> f('hello:world?')
123 123 'hello~3aworld~3f'
124 124 >>> f('the\x07quick\xADshot')
125 125 'the~07quick~adshot'
126 126 '''
127 127 winreserved = [ord(x) for x in '\\:*?"<>|']
128 128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
129 129 for x in (range(32) + range(126, 256) + winreserved):
130 130 cmap[chr(x)] = "~%02x" % x
131 131 for x in range(ord("A"), ord("Z")+1):
132 132 cmap[chr(x)] = chr(x).lower()
133 133 return lambda s: "".join([cmap[c] for c in s])
134 134
135 135 lowerencode = _buildlowerencodefun()
136 136
137 137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
138 138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
139 139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
140 140 def _auxencode(path, dotencode):
141 141 '''
142 142 Encodes filenames containing names reserved by Windows or which end in
143 143 period or space. Does not touch other single reserved characters c.
144 144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
145 145 Additionally encodes space or period at the beginning, if dotencode is
146 146 True. Parameter path is assumed to be all lowercase.
147 147 A segment only needs encoding if a reserved name appears as a
148 148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
149 149 doesn't need encoding.
150 150
151 151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
152 152 >>> _auxencode(s.split('/'), True)
153 153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
154 154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
155 155 >>> _auxencode(s.split('/'), False)
156 156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
157 157 >>> _auxencode(['foo. '], True)
158 158 ['foo.~20']
159 159 >>> _auxencode([' .foo'], True)
160 160 ['~20.foo']
161 161 '''
162 162 for i, n in enumerate(path):
163 163 if not n:
164 164 continue
165 165 if dotencode and n[0] in '. ':
166 166 n = "~%02x" % ord(n[0]) + n[1:]
167 167 path[i] = n
168 168 else:
169 169 l = n.find('.')
170 170 if l == -1:
171 171 l = len(n)
172 172 if ((l == 3 and n[:3] in _winres3) or
173 173 (l == 4 and n[3] <= '9' and n[3] >= '1'
174 174 and n[:3] in _winres4)):
175 175 # encode third letter ('aux' -> 'au~78')
176 176 ec = "~%02x" % ord(n[2])
177 177 n = n[0:2] + ec + n[3:]
178 178 path[i] = n
179 179 if n[-1] in '. ':
180 180 # encode last period or space ('foo...' -> 'foo..~2e')
181 181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
182 182 return path
183 183
184 184 _maxstorepathlen = 120
185 185 _dirprefixlen = 8
186 186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
187 187 def _hybridencode(path, dotencode):
188 188 '''encodes path with a length limit
189 189
190 190 Encodes all paths that begin with 'data/', according to the following.
191 191
192 192 Default encoding (reversible):
193 193
194 194 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
195 195 characters are encoded as '~xx', where xx is the two digit hex code
196 196 of the character (see encodefilename).
197 197 Relevant path components consisting of Windows reserved filenames are
198 198 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
199 199
200 200 Hashed encoding (not reversible):
201 201
202 202 If the default-encoded path is longer than _maxstorepathlen, a
203 203 non-reversible hybrid hashing of the path is done instead.
204 204 This encoding uses up to _dirprefixlen characters of all directory
205 205 levels of the lowerencoded path, but not more levels than can fit into
206 206 _maxshortdirslen.
207 207 Then follows the filler followed by the sha digest of the full path.
208 208 The filler is the beginning of the basename of the lowerencoded path
209 209 (the basename is everything after the last path separator). The filler
210 210 is as long as possible, filling in characters from the basename until
211 211 the encoded path has _maxstorepathlen characters (or all chars of the
212 212 basename have been taken).
213 213 The extension (e.g. '.i' or '.d') is preserved.
214 214
215 215 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
216 216 encoding was used.
217 217 '''
218 ef = encodefilename(path).split('/')
218 path = encodedir(path)
219 ef = _encodefname(path).split('/')
219 220 res = '/'.join(_auxencode(ef, dotencode))
220 221 if len(res) > _maxstorepathlen:
221 path = encodedir(path)
222 222 digest = _sha(path).hexdigest()
223 223 le = lowerencode(path).split('/')[1:]
224 224 parts = _auxencode(le, dotencode)
225 225 basename = parts[-1]
226 226 _root, ext = os.path.splitext(basename)
227 227 sdirs = []
228 228 sdirslen = 0
229 229 for p in parts[:-1]:
230 230 d = p[:_dirprefixlen]
231 231 if d[-1] in '. ':
232 232 # Windows can't access dirs ending in period or space
233 233 d = d[:-1] + '_'
234 234 if sdirslen == 0:
235 235 t = len(d)
236 236 else:
237 237 t = sdirslen + 1 + len(d)
238 238 if t > _maxshortdirslen:
239 239 break
240 240 sdirs.append(d)
241 241 sdirslen = t
242 242 dirs = '/'.join(sdirs)
243 243 if len(dirs) > 0:
244 244 dirs += '/'
245 245 res = 'dh/' + dirs + digest + ext
246 246 spaceleft = _maxstorepathlen - len(res)
247 247 if spaceleft > 0:
248 248 filler = basename[:spaceleft]
249 249 res = 'dh/' + dirs + filler + digest + ext
250 250 return res
251 251
252 252 def _calcmode(path):
253 253 try:
254 254 # files in .hg/ will be created using this mode
255 255 mode = os.stat(path).st_mode
256 256 # avoid some useless chmods
257 257 if (0777 & ~util.umask) == (0777 & mode):
258 258 mode = None
259 259 except OSError:
260 260 mode = None
261 261 return mode
262 262
263 263 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
264 264 ' phaseroots obsstore')
265 265
266 266 class basicstore(object):
267 267 '''base class for local repository stores'''
268 268 def __init__(self, path, openertype):
269 269 self.path = path
270 270 self.createmode = _calcmode(path)
271 271 op = openertype(self.path)
272 272 op.createmode = self.createmode
273 273 self.opener = scmutil.filteropener(op, encodedir)
274 274
275 275 def join(self, f):
276 276 return self.path + '/' + encodedir(f)
277 277
278 278 def _walk(self, relpath, recurse):
279 279 '''yields (unencoded, encoded, size)'''
280 280 path = self.path
281 281 if relpath:
282 282 path += '/' + relpath
283 283 striplen = len(self.path) + 1
284 284 l = []
285 285 if os.path.isdir(path):
286 286 visit = [path]
287 287 while visit:
288 288 p = visit.pop()
289 289 for f, kind, st in osutil.listdir(p, stat=True):
290 290 fp = p + '/' + f
291 291 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
292 292 n = util.pconvert(fp[striplen:])
293 293 l.append((decodedir(n), n, st.st_size))
294 294 elif kind == stat.S_IFDIR and recurse:
295 295 visit.append(fp)
296 296 l.sort()
297 297 return l
298 298
299 299 def datafiles(self):
300 300 return self._walk('data', True)
301 301
302 302 def walk(self):
303 303 '''yields (unencoded, encoded, size)'''
304 304 # yield data files first
305 305 for x in self.datafiles():
306 306 yield x
307 307 # yield manifest before changelog
308 308 for x in reversed(self._walk('', False)):
309 309 yield x
310 310
311 311 def copylist(self):
312 312 return ['requires'] + _data.split()
313 313
314 314 def write(self):
315 315 pass
316 316
317 317 class encodedstore(basicstore):
318 318 def __init__(self, path, openertype):
319 319 self.path = path + '/store'
320 320 self.createmode = _calcmode(self.path)
321 321 op = openertype(self.path)
322 322 op.createmode = self.createmode
323 323 self.opener = scmutil.filteropener(op, encodefilename)
324 324
325 325 def datafiles(self):
326 326 for a, b, size in self._walk('data', True):
327 327 try:
328 328 a = decodefilename(a)
329 329 except KeyError:
330 330 a = None
331 331 yield a, b, size
332 332
333 333 def join(self, f):
334 334 return self.path + '/' + encodefilename(f)
335 335
336 336 def copylist(self):
337 337 return (['requires', '00changelog.i'] +
338 338 ['store/' + f for f in _data.split()])
339 339
340 340 class fncache(object):
341 341 # the filename used to be partially encoded
342 342 # hence the encodedir/decodedir dance
343 343 def __init__(self, opener):
344 344 self.opener = opener
345 345 self.entries = None
346 346 self._dirty = False
347 347
348 348 def _load(self):
349 349 '''fill the entries from the fncache file'''
350 350 self._dirty = False
351 351 try:
352 352 fp = self.opener('fncache', mode='rb')
353 353 except IOError:
354 354 # skip nonexistent file
355 355 self.entries = set()
356 356 return
357 357 self.entries = set(decodedir(fp.read()).splitlines())
358 358 if '' in self.entries:
359 359 fp.seek(0)
360 360 for n, line in enumerate(fp):
361 361 if not line.rstrip('\n'):
362 362 t = _('invalid entry in fncache, line %s') % (n + 1)
363 363 raise util.Abort(t)
364 364 fp.close()
365 365
366 366 def _write(self, files, atomictemp):
367 367 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
368 368 if files:
369 369 fp.write(encodedir('\n'.join(files) + '\n'))
370 370 fp.close()
371 371 self._dirty = False
372 372
373 373 def rewrite(self, files):
374 374 self._write(files, False)
375 375 self.entries = set(files)
376 376
377 377 def write(self):
378 378 if self._dirty:
379 379 self._write(self.entries, True)
380 380
381 381 def add(self, fn):
382 382 if self.entries is None:
383 383 self._load()
384 384 if fn not in self.entries:
385 385 self._dirty = True
386 386 self.entries.add(fn)
387 387
388 388 def __contains__(self, fn):
389 389 if self.entries is None:
390 390 self._load()
391 391 return fn in self.entries
392 392
393 393 def __iter__(self):
394 394 if self.entries is None:
395 395 self._load()
396 396 return iter(self.entries)
397 397
398 398 class _fncacheopener(scmutil.abstractopener):
399 399 def __init__(self, op, fnc, encode):
400 400 self.opener = op
401 401 self.fncache = fnc
402 402 self.encode = encode
403 403
404 404 def _getmustaudit(self):
405 405 return self.opener.mustaudit
406 406
407 407 def _setmustaudit(self, onoff):
408 408 self.opener.mustaudit = onoff
409 409
410 410 mustaudit = property(_getmustaudit, _setmustaudit)
411 411
412 412 def __call__(self, path, mode='r', *args, **kw):
413 413 if mode not in ('r', 'rb') and path.startswith('data/'):
414 414 self.fncache.add(path)
415 415 return self.opener(self.encode(path), mode, *args, **kw)
416 416
417 417 def _plainhybridencode(f):
418 418 return _hybridencode(f, False)
419 419
420 420 def _dothybridencode(f):
421 421 return _hybridencode(f, True)
422 422
423 423 class fncachestore(basicstore):
424 424 def __init__(self, path, openertype, dotencode):
425 425 if dotencode:
426 426 encode = _dothybridencode
427 427 else:
428 428 encode = _plainhybridencode
429 429 self.encode = encode
430 430 self.path = path + '/store'
431 431 self.pathsep = self.path + '/'
432 432 self.createmode = _calcmode(self.path)
433 433 op = openertype(self.path)
434 434 op.createmode = self.createmode
435 435 fnc = fncache(op)
436 436 self.fncache = fnc
437 437 self.opener = _fncacheopener(op, fnc, encode)
438 438
439 439 def join(self, f):
440 440 return self.pathsep + self.encode(f)
441 441
442 442 def getsize(self, path):
443 443 return os.stat(self.pathsep + path).st_size
444 444
445 445 def datafiles(self):
446 446 rewrite = False
447 447 existing = []
448 448 for f in sorted(self.fncache):
449 449 ef = self.encode(f)
450 450 try:
451 451 yield f, ef, self.getsize(ef)
452 452 existing.append(f)
453 453 except OSError, err:
454 454 if err.errno != errno.ENOENT:
455 455 raise
456 456 # nonexistent entry
457 457 rewrite = True
458 458 if rewrite:
459 459 # rewrite fncache to remove nonexistent entries
460 460 # (may be caused by rollback / strip)
461 461 self.fncache.rewrite(existing)
462 462
463 463 def copylist(self):
464 464 d = ('data dh fncache phaseroots obsstore'
465 465 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
466 466 return (['requires', '00changelog.i'] +
467 467 ['store/' + f for f in d.split()])
468 468
469 469 def write(self):
470 470 self.fncache.write()
471 471
472 472 def store(requirements, path, openertype):
473 473 if 'store' in requirements:
474 474 if 'fncache' in requirements:
475 475 return fncachestore(path, openertype, 'dotencode' in requirements)
476 476 return encodedstore(path, openertype)
477 477 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now