##// END OF EJS Templates
store: reorder basename assignment in _hybridencode
Adrian Buehlmann -
r17587:5fb8cf6f default
parent child Browse files
Show More
@@ -1,443 +1,443 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 return (path
26 26 .replace(".hg/", ".hg.hg/")
27 27 .replace(".i/", ".i.hg/")
28 28 .replace(".d/", ".d.hg/"))
29 29
30 30 def decodedir(path):
31 31 '''
32 32 >>> decodedir('data/foo.i')
33 33 'data/foo.i'
34 34 >>> decodedir('data/foo.i.hg/bla.i')
35 35 'data/foo.i/bla.i'
36 36 >>> decodedir('data/foo.i.hg.hg/bla.i')
37 37 'data/foo.i.hg/bla.i'
38 38 '''
39 39 if ".hg/" not in path:
40 40 return path
41 41 return (path
42 42 .replace(".d.hg/", ".d/")
43 43 .replace(".i.hg/", ".i/")
44 44 .replace(".hg.hg/", ".hg/"))
45 45
46 46 def _buildencodefun():
47 47 '''
48 48 >>> enc, dec = _buildencodefun()
49 49
50 50 >>> enc('nothing/special.txt')
51 51 'nothing/special.txt'
52 52 >>> dec('nothing/special.txt')
53 53 'nothing/special.txt'
54 54
55 55 >>> enc('HELLO')
56 56 '_h_e_l_l_o'
57 57 >>> dec('_h_e_l_l_o')
58 58 'HELLO'
59 59
60 60 >>> enc('hello:world?')
61 61 'hello~3aworld~3f'
62 62 >>> dec('hello~3aworld~3f')
63 63 'hello:world?'
64 64
65 65 >>> enc('the\x07quick\xADshot')
66 66 'the~07quick~adshot'
67 67 >>> dec('the~07quick~adshot')
68 68 'the\\x07quick\\xadshot'
69 69 '''
70 70 e = '_'
71 71 winreserved = [ord(x) for x in '\\:*?"<>|']
72 72 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
73 73 for x in (range(32) + range(126, 256) + winreserved):
74 74 cmap[chr(x)] = "~%02x" % x
75 75 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
76 76 cmap[chr(x)] = e + chr(x).lower()
77 77 dmap = {}
78 78 for k, v in cmap.iteritems():
79 79 dmap[v] = k
80 80 def decode(s):
81 81 i = 0
82 82 while i < len(s):
83 83 for l in xrange(1, 4):
84 84 try:
85 85 yield dmap[s[i:i + l]]
86 86 i += l
87 87 break
88 88 except KeyError:
89 89 pass
90 90 else:
91 91 raise KeyError
92 92 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
93 93 lambda s: decodedir("".join(list(decode(s)))))
94 94
95 95 encodefilename, decodefilename = _buildencodefun()
96 96
97 97 def _buildlowerencodefun():
98 98 '''
99 99 >>> f = _buildlowerencodefun()
100 100 >>> f('nothing/special.txt')
101 101 'nothing/special.txt'
102 102 >>> f('HELLO')
103 103 'hello'
104 104 >>> f('hello:world?')
105 105 'hello~3aworld~3f'
106 106 >>> f('the\x07quick\xADshot')
107 107 'the~07quick~adshot'
108 108 '''
109 109 winreserved = [ord(x) for x in '\\:*?"<>|']
110 110 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
111 111 for x in (range(32) + range(126, 256) + winreserved):
112 112 cmap[chr(x)] = "~%02x" % x
113 113 for x in range(ord("A"), ord("Z")+1):
114 114 cmap[chr(x)] = chr(x).lower()
115 115 return lambda s: "".join([cmap[c] for c in s])
116 116
117 117 lowerencode = _buildlowerencodefun()
118 118
119 119 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
120 120 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
121 121 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
122 122 def _auxencode(path, dotencode):
123 123 '''
124 124 Encodes filenames containing names reserved by Windows or which end in
125 125 period or space. Does not touch other single reserved characters c.
126 126 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
127 127 Additionally encodes space or period at the beginning, if dotencode is
128 128 True. Parameter path is assumed to be all lowercase.
129 129 A segment only needs encoding if a reserved name appears as a
130 130 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
131 131 doesn't need encoding.
132 132
133 133 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
134 134 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
135 135 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.', False)
136 136 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
137 137 >>> _auxencode('foo. ', True)
138 138 ['foo.~20']
139 139 >>> _auxencode(' .foo', True)
140 140 ['~20.foo']
141 141 '''
142 142 res = path.split('/')
143 143 for i, n in enumerate(res):
144 144 if not n:
145 145 continue
146 146 if dotencode and n[0] in '. ':
147 147 n = "~%02x" % ord(n[0]) + n[1:]
148 148 res[i] = n
149 149 else:
150 150 l = n.find('.')
151 151 if l == -1:
152 152 l = len(n)
153 153 if ((l == 3 and n[:3] in _winres3) or
154 154 (l == 4 and n[3] <= '9' and n[3] >= '1'
155 155 and n[:3] in _winres4)):
156 156 # encode third letter ('aux' -> 'au~78')
157 157 ec = "~%02x" % ord(n[2])
158 158 n = n[0:2] + ec + n[3:]
159 159 res[i] = n
160 160 if n[-1] in '. ':
161 161 # encode last period or space ('foo...' -> 'foo..~2e')
162 162 res[i] = n[:-1] + "~%02x" % ord(n[-1])
163 163 return res
164 164
165 165 _maxstorepathlen = 120
166 166 _dirprefixlen = 8
167 167 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
168 168 def _hybridencode(path, auxencode):
169 169 '''encodes path with a length limit
170 170
171 171 Encodes all paths that begin with 'data/', according to the following.
172 172
173 173 Default encoding (reversible):
174 174
175 175 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
176 176 characters are encoded as '~xx', where xx is the two digit hex code
177 177 of the character (see encodefilename).
178 178 Relevant path components consisting of Windows reserved filenames are
179 179 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
180 180
181 181 Hashed encoding (not reversible):
182 182
183 183 If the default-encoded path is longer than _maxstorepathlen, a
184 184 non-reversible hybrid hashing of the path is done instead.
185 185 This encoding uses up to _dirprefixlen characters of all directory
186 186 levels of the lowerencoded path, but not more levels than can fit into
187 187 _maxshortdirslen.
188 188 Then follows the filler followed by the sha digest of the full path.
189 189 The filler is the beginning of the basename of the lowerencoded path
190 190 (the basename is everything after the last path separator). The filler
191 191 is as long as possible, filling in characters from the basename until
192 192 the encoded path has _maxstorepathlen characters (or all chars of the
193 193 basename have been taken).
194 194 The extension (e.g. '.i' or '.d') is preserved.
195 195
196 196 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
197 197 encoding was used.
198 198 '''
199 199 res = '/'.join(auxencode(encodefilename(path)))
200 200 if len(res) > _maxstorepathlen:
201 201 path = encodedir(path)
202 202 digest = _sha(path).hexdigest()
203 203 parts = auxencode(lowerencode(path))[1:]
204 _root, ext = os.path.splitext(parts[-1])
205 204 basename = parts[-1]
205 _root, ext = os.path.splitext(basename)
206 206 sdirs = []
207 207 for p in parts[:-1]:
208 208 d = p[:_dirprefixlen]
209 209 if d[-1] in '. ':
210 210 # Windows can't access dirs ending in period or space
211 211 d = d[:-1] + '_'
212 212 t = '/'.join(sdirs) + '/' + d
213 213 if len(t) > _maxshortdirslen:
214 214 break
215 215 sdirs.append(d)
216 216 dirs = '/'.join(sdirs)
217 217 if len(dirs) > 0:
218 218 dirs += '/'
219 219 res = 'dh/' + dirs + digest + ext
220 220 spaceleft = _maxstorepathlen - len(res)
221 221 if spaceleft > 0:
222 222 filler = basename[:spaceleft]
223 223 res = 'dh/' + dirs + filler + digest + ext
224 224 return res
225 225
226 226 def _calcmode(path):
227 227 try:
228 228 # files in .hg/ will be created using this mode
229 229 mode = os.stat(path).st_mode
230 230 # avoid some useless chmods
231 231 if (0777 & ~util.umask) == (0777 & mode):
232 232 mode = None
233 233 except OSError:
234 234 mode = None
235 235 return mode
236 236
237 237 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
238 238 ' phaseroots obsstore')
239 239
240 240 class basicstore(object):
241 241 '''base class for local repository stores'''
242 242 def __init__(self, path, openertype):
243 243 self.path = path
244 244 self.createmode = _calcmode(path)
245 245 op = openertype(self.path)
246 246 op.createmode = self.createmode
247 247 self.opener = scmutil.filteropener(op, encodedir)
248 248
249 249 def join(self, f):
250 250 return self.path + '/' + encodedir(f)
251 251
252 252 def _walk(self, relpath, recurse):
253 253 '''yields (unencoded, encoded, size)'''
254 254 path = self.path
255 255 if relpath:
256 256 path += '/' + relpath
257 257 striplen = len(self.path) + 1
258 258 l = []
259 259 if os.path.isdir(path):
260 260 visit = [path]
261 261 while visit:
262 262 p = visit.pop()
263 263 for f, kind, st in osutil.listdir(p, stat=True):
264 264 fp = p + '/' + f
265 265 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
266 266 n = util.pconvert(fp[striplen:])
267 267 l.append((decodedir(n), n, st.st_size))
268 268 elif kind == stat.S_IFDIR and recurse:
269 269 visit.append(fp)
270 270 l.sort()
271 271 return l
272 272
273 273 def datafiles(self):
274 274 return self._walk('data', True)
275 275
276 276 def walk(self):
277 277 '''yields (unencoded, encoded, size)'''
278 278 # yield data files first
279 279 for x in self.datafiles():
280 280 yield x
281 281 # yield manifest before changelog
282 282 for x in reversed(self._walk('', False)):
283 283 yield x
284 284
285 285 def copylist(self):
286 286 return ['requires'] + _data.split()
287 287
288 288 def write(self):
289 289 pass
290 290
291 291 class encodedstore(basicstore):
292 292 def __init__(self, path, openertype):
293 293 self.path = path + '/store'
294 294 self.createmode = _calcmode(self.path)
295 295 op = openertype(self.path)
296 296 op.createmode = self.createmode
297 297 self.opener = scmutil.filteropener(op, encodefilename)
298 298
299 299 def datafiles(self):
300 300 for a, b, size in self._walk('data', True):
301 301 try:
302 302 a = decodefilename(a)
303 303 except KeyError:
304 304 a = None
305 305 yield a, b, size
306 306
307 307 def join(self, f):
308 308 return self.path + '/' + encodefilename(f)
309 309
310 310 def copylist(self):
311 311 return (['requires', '00changelog.i'] +
312 312 ['store/' + f for f in _data.split()])
313 313
314 314 class fncache(object):
315 315 # the filename used to be partially encoded
316 316 # hence the encodedir/decodedir dance
317 317 def __init__(self, opener):
318 318 self.opener = opener
319 319 self.entries = None
320 320 self._dirty = False
321 321
322 322 def _load(self):
323 323 '''fill the entries from the fncache file'''
324 324 self._dirty = False
325 325 try:
326 326 fp = self.opener('fncache', mode='rb')
327 327 except IOError:
328 328 # skip nonexistent file
329 329 self.entries = set()
330 330 return
331 331 self.entries = set(map(decodedir, fp.read().splitlines()))
332 332 if '' in self.entries:
333 333 fp.seek(0)
334 334 for n, line in enumerate(fp):
335 335 if not line.rstrip('\n'):
336 336 t = _('invalid entry in fncache, line %s') % (n + 1)
337 337 raise util.Abort(t)
338 338 fp.close()
339 339
340 340 def _write(self, files, atomictemp):
341 341 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
342 342 if files:
343 343 fp.write('\n'.join(map(encodedir, files)) + '\n')
344 344 fp.close()
345 345 self._dirty = False
346 346
347 347 def rewrite(self, files):
348 348 self._write(files, False)
349 349 self.entries = set(files)
350 350
351 351 def write(self):
352 352 if self._dirty:
353 353 self._write(self.entries, True)
354 354
355 355 def add(self, fn):
356 356 if self.entries is None:
357 357 self._load()
358 358 if fn not in self.entries:
359 359 self._dirty = True
360 360 self.entries.add(fn)
361 361
362 362 def __contains__(self, fn):
363 363 if self.entries is None:
364 364 self._load()
365 365 return fn in self.entries
366 366
367 367 def __iter__(self):
368 368 if self.entries is None:
369 369 self._load()
370 370 return iter(self.entries)
371 371
372 372 class _fncacheopener(scmutil.abstractopener):
373 373 def __init__(self, op, fnc, encode):
374 374 self.opener = op
375 375 self.fncache = fnc
376 376 self.encode = encode
377 377
378 378 def _getmustaudit(self):
379 379 return self.opener.mustaudit
380 380
381 381 def _setmustaudit(self, onoff):
382 382 self.opener.mustaudit = onoff
383 383
384 384 mustaudit = property(_getmustaudit, _setmustaudit)
385 385
386 386 def __call__(self, path, mode='r', *args, **kw):
387 387 if mode not in ('r', 'rb') and path.startswith('data/'):
388 388 self.fncache.add(path)
389 389 return self.opener(self.encode(path), mode, *args, **kw)
390 390
391 391 class fncachestore(basicstore):
392 392 def __init__(self, path, openertype, encode):
393 393 self.encode = encode
394 394 self.path = path + '/store'
395 395 self.pathsep = self.path + '/'
396 396 self.createmode = _calcmode(self.path)
397 397 op = openertype(self.path)
398 398 op.createmode = self.createmode
399 399 fnc = fncache(op)
400 400 self.fncache = fnc
401 401 self.opener = _fncacheopener(op, fnc, encode)
402 402
403 403 def join(self, f):
404 404 return self.pathsep + self.encode(f)
405 405
406 406 def getsize(self, path):
407 407 return os.stat(self.pathsep + path).st_size
408 408
409 409 def datafiles(self):
410 410 rewrite = False
411 411 existing = []
412 412 for f in sorted(self.fncache):
413 413 ef = self.encode(f)
414 414 try:
415 415 yield f, ef, self.getsize(ef)
416 416 existing.append(f)
417 417 except OSError, err:
418 418 if err.errno != errno.ENOENT:
419 419 raise
420 420 # nonexistent entry
421 421 rewrite = True
422 422 if rewrite:
423 423 # rewrite fncache to remove nonexistent entries
424 424 # (may be caused by rollback / strip)
425 425 self.fncache.rewrite(existing)
426 426
427 427 def copylist(self):
428 428 d = ('data dh fncache phaseroots obsstore'
429 429 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
430 430 return (['requires', '00changelog.i'] +
431 431 ['store/' + f for f in d.split()])
432 432
433 433 def write(self):
434 434 self.fncache.write()
435 435
436 436 def store(requirements, path, openertype):
437 437 if 'store' in requirements:
438 438 if 'fncache' in requirements:
439 439 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
440 440 encode = lambda f: _hybridencode(f, auxencode)
441 441 return fncachestore(path, openertype, encode)
442 442 return encodedstore(path, openertype)
443 443 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now