##// END OF EJS Templates
store: explain "aux.foo" versus "foo.aux" in doc of _auxencode()
Adrian Buehlmann -
r17569:e9af2134 default
parent child Browse files
Show More
@@ -1,441 +1,443
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 121 _winreservednames = '''con prn aux nul
122 122 com1 com2 com3 com4 com5 com6 com7 com8 com9
123 123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 True.
131 path is assumed to be all lowercase.
130 True. Parameter path is assumed to be all lowercase.
131 A segment only needs encoding if a reserved name appears as a
132 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
133 doesn't need encoding.
132 134
133 135 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
134 136 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
135 137 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.', False)
136 138 '.com1com2/lp~749.lpt4.lpt1/conprn/com0/lpt0/foo~2e'
137 139 >>> _auxencode('foo. ', True)
138 140 'foo.~20'
139 141 >>> _auxencode(' .foo', True)
140 142 '~20.foo'
141 143 '''
142 144 res = []
143 145 for n in path.split('/'):
144 146 if n:
145 147 base = n.split('.')[0]
146 148 if base and (base in _winreservednames):
147 149 # encode third letter ('aux' -> 'au~78')
148 150 ec = "~%02x" % ord(n[2])
149 151 n = n[0:2] + ec + n[3:]
150 152 if n[-1] in '. ':
151 153 # encode last period or space ('foo...' -> 'foo..~2e')
152 154 n = n[:-1] + "~%02x" % ord(n[-1])
153 155 if dotencode and n[0] in '. ':
154 156 n = "~%02x" % ord(n[0]) + n[1:]
155 157 res.append(n)
156 158 return '/'.join(res)
157 159
158 160 _maxstorepathlen = 120
159 161 _dirprefixlen = 8
160 162 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
161 163 def _hybridencode(path, auxencode):
162 164 '''encodes path with a length limit
163 165
164 166 Encodes all paths that begin with 'data/', according to the following.
165 167
166 168 Default encoding (reversible):
167 169
168 170 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
169 171 characters are encoded as '~xx', where xx is the two digit hex code
170 172 of the character (see encodefilename).
171 173 Relevant path components consisting of Windows reserved filenames are
172 174 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
173 175
174 176 Hashed encoding (not reversible):
175 177
176 178 If the default-encoded path is longer than _maxstorepathlen, a
177 179 non-reversible hybrid hashing of the path is done instead.
178 180 This encoding uses up to _dirprefixlen characters of all directory
179 181 levels of the lowerencoded path, but not more levels than can fit into
180 182 _maxshortdirslen.
181 183 Then follows the filler followed by the sha digest of the full path.
182 184 The filler is the beginning of the basename of the lowerencoded path
183 185 (the basename is everything after the last path separator). The filler
184 186 is as long as possible, filling in characters from the basename until
185 187 the encoded path has _maxstorepathlen characters (or all chars of the
186 188 basename have been taken).
187 189 The extension (e.g. '.i' or '.d') is preserved.
188 190
189 191 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
190 192 encoding was used.
191 193 '''
192 194 if not path.startswith('data/'):
193 195 return path
194 196 # escape directories ending with .i and .d
195 197 path = encodedir(path)
196 198 ndpath = path[len('data/'):]
197 199 res = 'data/' + auxencode(encodefilename(ndpath))
198 200 if len(res) > _maxstorepathlen:
199 201 digest = _sha(path).hexdigest()
200 202 aep = auxencode(lowerencode(ndpath))
201 203 _root, ext = os.path.splitext(aep)
202 204 parts = aep.split('/')
203 205 basename = parts[-1]
204 206 sdirs = []
205 207 for p in parts[:-1]:
206 208 d = p[:_dirprefixlen]
207 209 if d[-1] in '. ':
208 210 # Windows can't access dirs ending in period or space
209 211 d = d[:-1] + '_'
210 212 t = '/'.join(sdirs) + '/' + d
211 213 if len(t) > _maxshortdirslen:
212 214 break
213 215 sdirs.append(d)
214 216 dirs = '/'.join(sdirs)
215 217 if len(dirs) > 0:
216 218 dirs += '/'
217 219 res = 'dh/' + dirs + digest + ext
218 220 spaceleft = _maxstorepathlen - len(res)
219 221 if spaceleft > 0:
220 222 filler = basename[:spaceleft]
221 223 res = 'dh/' + dirs + filler + digest + ext
222 224 return res
223 225
224 226 def _calcmode(path):
225 227 try:
226 228 # files in .hg/ will be created using this mode
227 229 mode = os.stat(path).st_mode
228 230 # avoid some useless chmods
229 231 if (0777 & ~util.umask) == (0777 & mode):
230 232 mode = None
231 233 except OSError:
232 234 mode = None
233 235 return mode
234 236
235 237 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
236 238 ' phaseroots obsstore')
237 239
238 240 class basicstore(object):
239 241 '''base class for local repository stores'''
240 242 def __init__(self, path, openertype):
241 243 self.path = path
242 244 self.createmode = _calcmode(path)
243 245 op = openertype(self.path)
244 246 op.createmode = self.createmode
245 247 self.opener = scmutil.filteropener(op, encodedir)
246 248
247 249 def join(self, f):
248 250 return self.path + '/' + encodedir(f)
249 251
250 252 def _walk(self, relpath, recurse):
251 253 '''yields (unencoded, encoded, size)'''
252 254 path = self.path
253 255 if relpath:
254 256 path += '/' + relpath
255 257 striplen = len(self.path) + 1
256 258 l = []
257 259 if os.path.isdir(path):
258 260 visit = [path]
259 261 while visit:
260 262 p = visit.pop()
261 263 for f, kind, st in osutil.listdir(p, stat=True):
262 264 fp = p + '/' + f
263 265 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
264 266 n = util.pconvert(fp[striplen:])
265 267 l.append((decodedir(n), n, st.st_size))
266 268 elif kind == stat.S_IFDIR and recurse:
267 269 visit.append(fp)
268 270 l.sort()
269 271 return l
270 272
271 273 def datafiles(self):
272 274 return self._walk('data', True)
273 275
274 276 def walk(self):
275 277 '''yields (unencoded, encoded, size)'''
276 278 # yield data files first
277 279 for x in self.datafiles():
278 280 yield x
279 281 # yield manifest before changelog
280 282 for x in reversed(self._walk('', False)):
281 283 yield x
282 284
283 285 def copylist(self):
284 286 return ['requires'] + _data.split()
285 287
286 288 def write(self):
287 289 pass
288 290
289 291 class encodedstore(basicstore):
290 292 def __init__(self, path, openertype):
291 293 self.path = path + '/store'
292 294 self.createmode = _calcmode(self.path)
293 295 op = openertype(self.path)
294 296 op.createmode = self.createmode
295 297 self.opener = scmutil.filteropener(op, encodefilename)
296 298
297 299 def datafiles(self):
298 300 for a, b, size in self._walk('data', True):
299 301 try:
300 302 a = decodefilename(a)
301 303 except KeyError:
302 304 a = None
303 305 yield a, b, size
304 306
305 307 def join(self, f):
306 308 return self.path + '/' + encodefilename(f)
307 309
308 310 def copylist(self):
309 311 return (['requires', '00changelog.i'] +
310 312 ['store/' + f for f in _data.split()])
311 313
312 314 class fncache(object):
313 315 # the filename used to be partially encoded
314 316 # hence the encodedir/decodedir dance
315 317 def __init__(self, opener):
316 318 self.opener = opener
317 319 self.entries = None
318 320 self._dirty = False
319 321
320 322 def _load(self):
321 323 '''fill the entries from the fncache file'''
322 324 self._dirty = False
323 325 try:
324 326 fp = self.opener('fncache', mode='rb')
325 327 except IOError:
326 328 # skip nonexistent file
327 329 self.entries = set()
328 330 return
329 331 self.entries = set(map(decodedir, fp.read().splitlines()))
330 332 if '' in self.entries:
331 333 fp.seek(0)
332 334 for n, line in enumerate(fp):
333 335 if not line.rstrip('\n'):
334 336 t = _('invalid entry in fncache, line %s') % (n + 1)
335 337 raise util.Abort(t)
336 338 fp.close()
337 339
338 340 def _write(self, files, atomictemp):
339 341 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
340 342 if files:
341 343 fp.write('\n'.join(map(encodedir, files)) + '\n')
342 344 fp.close()
343 345 self._dirty = False
344 346
345 347 def rewrite(self, files):
346 348 self._write(files, False)
347 349 self.entries = set(files)
348 350
349 351 def write(self):
350 352 if self._dirty:
351 353 self._write(self.entries, True)
352 354
353 355 def add(self, fn):
354 356 if self.entries is None:
355 357 self._load()
356 358 if fn not in self.entries:
357 359 self._dirty = True
358 360 self.entries.add(fn)
359 361
360 362 def __contains__(self, fn):
361 363 if self.entries is None:
362 364 self._load()
363 365 return fn in self.entries
364 366
365 367 def __iter__(self):
366 368 if self.entries is None:
367 369 self._load()
368 370 return iter(self.entries)
369 371
370 372 class _fncacheopener(scmutil.abstractopener):
371 373 def __init__(self, op, fnc, encode):
372 374 self.opener = op
373 375 self.fncache = fnc
374 376 self.encode = encode
375 377
376 378 def _getmustaudit(self):
377 379 return self.opener.mustaudit
378 380
379 381 def _setmustaudit(self, onoff):
380 382 self.opener.mustaudit = onoff
381 383
382 384 mustaudit = property(_getmustaudit, _setmustaudit)
383 385
384 386 def __call__(self, path, mode='r', *args, **kw):
385 387 if mode not in ('r', 'rb') and path.startswith('data/'):
386 388 self.fncache.add(path)
387 389 return self.opener(self.encode(path), mode, *args, **kw)
388 390
389 391 class fncachestore(basicstore):
390 392 def __init__(self, path, openertype, encode):
391 393 self.encode = encode
392 394 self.path = path + '/store'
393 395 self.pathsep = self.path + '/'
394 396 self.createmode = _calcmode(self.path)
395 397 op = openertype(self.path)
396 398 op.createmode = self.createmode
397 399 fnc = fncache(op)
398 400 self.fncache = fnc
399 401 self.opener = _fncacheopener(op, fnc, encode)
400 402
401 403 def join(self, f):
402 404 return self.pathsep + self.encode(f)
403 405
404 406 def getsize(self, path):
405 407 return os.stat(self.pathsep + path).st_size
406 408
407 409 def datafiles(self):
408 410 rewrite = False
409 411 existing = []
410 412 for f in sorted(self.fncache):
411 413 ef = self.encode(f)
412 414 try:
413 415 yield f, ef, self.getsize(ef)
414 416 existing.append(f)
415 417 except OSError, err:
416 418 if err.errno != errno.ENOENT:
417 419 raise
418 420 # nonexistent entry
419 421 rewrite = True
420 422 if rewrite:
421 423 # rewrite fncache to remove nonexistent entries
422 424 # (may be caused by rollback / strip)
423 425 self.fncache.rewrite(existing)
424 426
425 427 def copylist(self):
426 428 d = ('data dh fncache phaseroots obsstore'
427 429 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
428 430 return (['requires', '00changelog.i'] +
429 431 ['store/' + f for f in d.split()])
430 432
431 433 def write(self):
432 434 self.fncache.write()
433 435
434 436 def store(requirements, path, openertype):
435 437 if 'store' in requirements:
436 438 if 'fncache' in requirements:
437 439 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
438 440 encode = lambda f: _hybridencode(f, auxencode)
439 441 return fncachestore(path, openertype, encode)
440 442 return encodedstore(path, openertype)
441 443 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now