##// END OF EJS Templates
store: let _auxencode() return the list of path segments...
Adrian Buehlmann -
r17574:81a033bb default
parent child Browse files
Show More
@@ -1,450 +1,449 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 121 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
122 122 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
123 123 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 130 True. Parameter path is assumed to be all lowercase.
131 131 A segment only needs encoding if a reserved name appears as a
132 132 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
133 133 doesn't need encoding.
134 134
135 135 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
136 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
136 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
137 137 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.', False)
138 '.com1com2/lp~749.lpt4.lpt1/conprn/com0/lpt0/foo~2e'
138 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
139 139 >>> _auxencode('foo. ', True)
140 'foo.~20'
140 ['foo.~20']
141 141 >>> _auxencode(' .foo', True)
142 '~20.foo'
142 ['~20.foo']
143 143 '''
144 144 res = path.split('/')
145 145 for i, n in enumerate(res):
146 146 if not n:
147 147 continue
148 148 if dotencode and n[0] in '. ':
149 149 n = "~%02x" % ord(n[0]) + n[1:]
150 150 res[i] = n
151 151 else:
152 152 l = n.find('.')
153 153 if l == -1:
154 154 l = len(n)
155 155 if ((l == 3 and n[:3] in _winres3) or
156 156 (l == 4 and n[3] <= '9' and n[3] >= '1'
157 157 and n[:3] in _winres4)):
158 158 # encode third letter ('aux' -> 'au~78')
159 159 ec = "~%02x" % ord(n[2])
160 160 n = n[0:2] + ec + n[3:]
161 161 res[i] = n
162 162 if n[-1] in '. ':
163 163 # encode last period or space ('foo...' -> 'foo..~2e')
164 164 res[i] = n[:-1] + "~%02x" % ord(n[-1])
165 return '/'.join(res)
165 return res
166 166
167 167 _maxstorepathlen = 120
168 168 _dirprefixlen = 8
169 169 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
170 170 def _hybridencode(path, auxencode):
171 171 '''encodes path with a length limit
172 172
173 173 Encodes all paths that begin with 'data/', according to the following.
174 174
175 175 Default encoding (reversible):
176 176
177 177 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
178 178 characters are encoded as '~xx', where xx is the two digit hex code
179 179 of the character (see encodefilename).
180 180 Relevant path components consisting of Windows reserved filenames are
181 181 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
182 182
183 183 Hashed encoding (not reversible):
184 184
185 185 If the default-encoded path is longer than _maxstorepathlen, a
186 186 non-reversible hybrid hashing of the path is done instead.
187 187 This encoding uses up to _dirprefixlen characters of all directory
188 188 levels of the lowerencoded path, but not more levels than can fit into
189 189 _maxshortdirslen.
190 190 Then follows the filler followed by the sha digest of the full path.
191 191 The filler is the beginning of the basename of the lowerencoded path
192 192 (the basename is everything after the last path separator). The filler
193 193 is as long as possible, filling in characters from the basename until
194 194 the encoded path has _maxstorepathlen characters (or all chars of the
195 195 basename have been taken).
196 196 The extension (e.g. '.i' or '.d') is preserved.
197 197
198 198 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
199 199 encoding was used.
200 200 '''
201 201 if not path.startswith('data/'):
202 202 return path
203 203 # escape directories ending with .i and .d
204 204 path = encodedir(path)
205 205 ndpath = path[len('data/'):]
206 res = 'data/' + auxencode(encodefilename(ndpath))
206 res = 'data/' + '/'.join(auxencode(encodefilename(ndpath)))
207 207 if len(res) > _maxstorepathlen:
208 208 digest = _sha(path).hexdigest()
209 aep = auxencode(lowerencode(ndpath))
210 _root, ext = os.path.splitext(aep)
211 parts = aep.split('/')
209 parts = auxencode(lowerencode(ndpath))
210 _root, ext = os.path.splitext(parts[-1])
212 211 basename = parts[-1]
213 212 sdirs = []
214 213 for p in parts[:-1]:
215 214 d = p[:_dirprefixlen]
216 215 if d[-1] in '. ':
217 216 # Windows can't access dirs ending in period or space
218 217 d = d[:-1] + '_'
219 218 t = '/'.join(sdirs) + '/' + d
220 219 if len(t) > _maxshortdirslen:
221 220 break
222 221 sdirs.append(d)
223 222 dirs = '/'.join(sdirs)
224 223 if len(dirs) > 0:
225 224 dirs += '/'
226 225 res = 'dh/' + dirs + digest + ext
227 226 spaceleft = _maxstorepathlen - len(res)
228 227 if spaceleft > 0:
229 228 filler = basename[:spaceleft]
230 229 res = 'dh/' + dirs + filler + digest + ext
231 230 return res
232 231
233 232 def _calcmode(path):
234 233 try:
235 234 # files in .hg/ will be created using this mode
236 235 mode = os.stat(path).st_mode
237 236 # avoid some useless chmods
238 237 if (0777 & ~util.umask) == (0777 & mode):
239 238 mode = None
240 239 except OSError:
241 240 mode = None
242 241 return mode
243 242
244 243 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
245 244 ' phaseroots obsstore')
246 245
247 246 class basicstore(object):
248 247 '''base class for local repository stores'''
249 248 def __init__(self, path, openertype):
250 249 self.path = path
251 250 self.createmode = _calcmode(path)
252 251 op = openertype(self.path)
253 252 op.createmode = self.createmode
254 253 self.opener = scmutil.filteropener(op, encodedir)
255 254
256 255 def join(self, f):
257 256 return self.path + '/' + encodedir(f)
258 257
259 258 def _walk(self, relpath, recurse):
260 259 '''yields (unencoded, encoded, size)'''
261 260 path = self.path
262 261 if relpath:
263 262 path += '/' + relpath
264 263 striplen = len(self.path) + 1
265 264 l = []
266 265 if os.path.isdir(path):
267 266 visit = [path]
268 267 while visit:
269 268 p = visit.pop()
270 269 for f, kind, st in osutil.listdir(p, stat=True):
271 270 fp = p + '/' + f
272 271 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
273 272 n = util.pconvert(fp[striplen:])
274 273 l.append((decodedir(n), n, st.st_size))
275 274 elif kind == stat.S_IFDIR and recurse:
276 275 visit.append(fp)
277 276 l.sort()
278 277 return l
279 278
280 279 def datafiles(self):
281 280 return self._walk('data', True)
282 281
283 282 def walk(self):
284 283 '''yields (unencoded, encoded, size)'''
285 284 # yield data files first
286 285 for x in self.datafiles():
287 286 yield x
288 287 # yield manifest before changelog
289 288 for x in reversed(self._walk('', False)):
290 289 yield x
291 290
292 291 def copylist(self):
293 292 return ['requires'] + _data.split()
294 293
295 294 def write(self):
296 295 pass
297 296
298 297 class encodedstore(basicstore):
299 298 def __init__(self, path, openertype):
300 299 self.path = path + '/store'
301 300 self.createmode = _calcmode(self.path)
302 301 op = openertype(self.path)
303 302 op.createmode = self.createmode
304 303 self.opener = scmutil.filteropener(op, encodefilename)
305 304
306 305 def datafiles(self):
307 306 for a, b, size in self._walk('data', True):
308 307 try:
309 308 a = decodefilename(a)
310 309 except KeyError:
311 310 a = None
312 311 yield a, b, size
313 312
314 313 def join(self, f):
315 314 return self.path + '/' + encodefilename(f)
316 315
317 316 def copylist(self):
318 317 return (['requires', '00changelog.i'] +
319 318 ['store/' + f for f in _data.split()])
320 319
321 320 class fncache(object):
322 321 # the filename used to be partially encoded
323 322 # hence the encodedir/decodedir dance
324 323 def __init__(self, opener):
325 324 self.opener = opener
326 325 self.entries = None
327 326 self._dirty = False
328 327
329 328 def _load(self):
330 329 '''fill the entries from the fncache file'''
331 330 self._dirty = False
332 331 try:
333 332 fp = self.opener('fncache', mode='rb')
334 333 except IOError:
335 334 # skip nonexistent file
336 335 self.entries = set()
337 336 return
338 337 self.entries = set(map(decodedir, fp.read().splitlines()))
339 338 if '' in self.entries:
340 339 fp.seek(0)
341 340 for n, line in enumerate(fp):
342 341 if not line.rstrip('\n'):
343 342 t = _('invalid entry in fncache, line %s') % (n + 1)
344 343 raise util.Abort(t)
345 344 fp.close()
346 345
347 346 def _write(self, files, atomictemp):
348 347 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
349 348 if files:
350 349 fp.write('\n'.join(map(encodedir, files)) + '\n')
351 350 fp.close()
352 351 self._dirty = False
353 352
354 353 def rewrite(self, files):
355 354 self._write(files, False)
356 355 self.entries = set(files)
357 356
358 357 def write(self):
359 358 if self._dirty:
360 359 self._write(self.entries, True)
361 360
362 361 def add(self, fn):
363 362 if self.entries is None:
364 363 self._load()
365 364 if fn not in self.entries:
366 365 self._dirty = True
367 366 self.entries.add(fn)
368 367
369 368 def __contains__(self, fn):
370 369 if self.entries is None:
371 370 self._load()
372 371 return fn in self.entries
373 372
374 373 def __iter__(self):
375 374 if self.entries is None:
376 375 self._load()
377 376 return iter(self.entries)
378 377
379 378 class _fncacheopener(scmutil.abstractopener):
380 379 def __init__(self, op, fnc, encode):
381 380 self.opener = op
382 381 self.fncache = fnc
383 382 self.encode = encode
384 383
385 384 def _getmustaudit(self):
386 385 return self.opener.mustaudit
387 386
388 387 def _setmustaudit(self, onoff):
389 388 self.opener.mustaudit = onoff
390 389
391 390 mustaudit = property(_getmustaudit, _setmustaudit)
392 391
393 392 def __call__(self, path, mode='r', *args, **kw):
394 393 if mode not in ('r', 'rb') and path.startswith('data/'):
395 394 self.fncache.add(path)
396 395 return self.opener(self.encode(path), mode, *args, **kw)
397 396
398 397 class fncachestore(basicstore):
399 398 def __init__(self, path, openertype, encode):
400 399 self.encode = encode
401 400 self.path = path + '/store'
402 401 self.pathsep = self.path + '/'
403 402 self.createmode = _calcmode(self.path)
404 403 op = openertype(self.path)
405 404 op.createmode = self.createmode
406 405 fnc = fncache(op)
407 406 self.fncache = fnc
408 407 self.opener = _fncacheopener(op, fnc, encode)
409 408
410 409 def join(self, f):
411 410 return self.pathsep + self.encode(f)
412 411
413 412 def getsize(self, path):
414 413 return os.stat(self.pathsep + path).st_size
415 414
416 415 def datafiles(self):
417 416 rewrite = False
418 417 existing = []
419 418 for f in sorted(self.fncache):
420 419 ef = self.encode(f)
421 420 try:
422 421 yield f, ef, self.getsize(ef)
423 422 existing.append(f)
424 423 except OSError, err:
425 424 if err.errno != errno.ENOENT:
426 425 raise
427 426 # nonexistent entry
428 427 rewrite = True
429 428 if rewrite:
430 429 # rewrite fncache to remove nonexistent entries
431 430 # (may be caused by rollback / strip)
432 431 self.fncache.rewrite(existing)
433 432
434 433 def copylist(self):
435 434 d = ('data dh fncache phaseroots obsstore'
436 435 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
437 436 return (['requires', '00changelog.i'] +
438 437 ['store/' + f for f in d.split()])
439 438
440 439 def write(self):
441 440 self.fncache.write()
442 441
443 442 def store(requirements, path, openertype):
444 443 if 'store' in requirements:
445 444 if 'fncache' in requirements:
446 445 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
447 446 encode = lambda f: _hybridencode(f, auxencode)
448 447 return fncachestore(path, openertype, encode)
449 448 return encodedstore(path, openertype)
450 449 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now