##// END OF EJS Templates
store: remove uneeded startswith('data/') check in _hybridencode()
Adrian Buehlmann -
r17585:8ed2783f default
parent child Browse files
Show More
@@ -1,447 +1,445 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 121 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
122 122 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
123 123 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 130 True. Parameter path is assumed to be all lowercase.
131 131 A segment only needs encoding if a reserved name appears as a
132 132 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
133 133 doesn't need encoding.
134 134
135 135 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
136 136 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
137 137 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.', False)
138 138 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
139 139 >>> _auxencode('foo. ', True)
140 140 ['foo.~20']
141 141 >>> _auxencode(' .foo', True)
142 142 ['~20.foo']
143 143 '''
144 144 res = path.split('/')
145 145 for i, n in enumerate(res):
146 146 if not n:
147 147 continue
148 148 if dotencode and n[0] in '. ':
149 149 n = "~%02x" % ord(n[0]) + n[1:]
150 150 res[i] = n
151 151 else:
152 152 l = n.find('.')
153 153 if l == -1:
154 154 l = len(n)
155 155 if ((l == 3 and n[:3] in _winres3) or
156 156 (l == 4 and n[3] <= '9' and n[3] >= '1'
157 157 and n[:3] in _winres4)):
158 158 # encode third letter ('aux' -> 'au~78')
159 159 ec = "~%02x" % ord(n[2])
160 160 n = n[0:2] + ec + n[3:]
161 161 res[i] = n
162 162 if n[-1] in '. ':
163 163 # encode last period or space ('foo...' -> 'foo..~2e')
164 164 res[i] = n[:-1] + "~%02x" % ord(n[-1])
165 165 return res
166 166
167 167 _maxstorepathlen = 120
168 168 _dirprefixlen = 8
169 169 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
170 170 def _hybridencode(path, auxencode):
171 171 '''encodes path with a length limit
172 172
173 173 Encodes all paths that begin with 'data/', according to the following.
174 174
175 175 Default encoding (reversible):
176 176
177 177 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
178 178 characters are encoded as '~xx', where xx is the two digit hex code
179 179 of the character (see encodefilename).
180 180 Relevant path components consisting of Windows reserved filenames are
181 181 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
182 182
183 183 Hashed encoding (not reversible):
184 184
185 185 If the default-encoded path is longer than _maxstorepathlen, a
186 186 non-reversible hybrid hashing of the path is done instead.
187 187 This encoding uses up to _dirprefixlen characters of all directory
188 188 levels of the lowerencoded path, but not more levels than can fit into
189 189 _maxshortdirslen.
190 190 Then follows the filler followed by the sha digest of the full path.
191 191 The filler is the beginning of the basename of the lowerencoded path
192 192 (the basename is everything after the last path separator). The filler
193 193 is as long as possible, filling in characters from the basename until
194 194 the encoded path has _maxstorepathlen characters (or all chars of the
195 195 basename have been taken).
196 196 The extension (e.g. '.i' or '.d') is preserved.
197 197
198 198 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
199 199 encoding was used.
200 200 '''
201 if not path.startswith('data/'):
202 return path
203 201 res = '/'.join(auxencode(encodefilename(path)))
204 202 if len(res) > _maxstorepathlen:
205 203 path = encodedir(path)
206 204 digest = _sha(path).hexdigest()
207 205 parts = auxencode(lowerencode(path))[1:]
208 206 _root, ext = os.path.splitext(parts[-1])
209 207 basename = parts[-1]
210 208 sdirs = []
211 209 for p in parts[:-1]:
212 210 d = p[:_dirprefixlen]
213 211 if d[-1] in '. ':
214 212 # Windows can't access dirs ending in period or space
215 213 d = d[:-1] + '_'
216 214 t = '/'.join(sdirs) + '/' + d
217 215 if len(t) > _maxshortdirslen:
218 216 break
219 217 sdirs.append(d)
220 218 dirs = '/'.join(sdirs)
221 219 if len(dirs) > 0:
222 220 dirs += '/'
223 221 res = 'dh/' + dirs + digest + ext
224 222 spaceleft = _maxstorepathlen - len(res)
225 223 if spaceleft > 0:
226 224 filler = basename[:spaceleft]
227 225 res = 'dh/' + dirs + filler + digest + ext
228 226 return res
229 227
230 228 def _calcmode(path):
231 229 try:
232 230 # files in .hg/ will be created using this mode
233 231 mode = os.stat(path).st_mode
234 232 # avoid some useless chmods
235 233 if (0777 & ~util.umask) == (0777 & mode):
236 234 mode = None
237 235 except OSError:
238 236 mode = None
239 237 return mode
240 238
241 239 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
242 240 ' phaseroots obsstore')
243 241
244 242 class basicstore(object):
245 243 '''base class for local repository stores'''
246 244 def __init__(self, path, openertype):
247 245 self.path = path
248 246 self.createmode = _calcmode(path)
249 247 op = openertype(self.path)
250 248 op.createmode = self.createmode
251 249 self.opener = scmutil.filteropener(op, encodedir)
252 250
253 251 def join(self, f):
254 252 return self.path + '/' + encodedir(f)
255 253
256 254 def _walk(self, relpath, recurse):
257 255 '''yields (unencoded, encoded, size)'''
258 256 path = self.path
259 257 if relpath:
260 258 path += '/' + relpath
261 259 striplen = len(self.path) + 1
262 260 l = []
263 261 if os.path.isdir(path):
264 262 visit = [path]
265 263 while visit:
266 264 p = visit.pop()
267 265 for f, kind, st in osutil.listdir(p, stat=True):
268 266 fp = p + '/' + f
269 267 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
270 268 n = util.pconvert(fp[striplen:])
271 269 l.append((decodedir(n), n, st.st_size))
272 270 elif kind == stat.S_IFDIR and recurse:
273 271 visit.append(fp)
274 272 l.sort()
275 273 return l
276 274
277 275 def datafiles(self):
278 276 return self._walk('data', True)
279 277
280 278 def walk(self):
281 279 '''yields (unencoded, encoded, size)'''
282 280 # yield data files first
283 281 for x in self.datafiles():
284 282 yield x
285 283 # yield manifest before changelog
286 284 for x in reversed(self._walk('', False)):
287 285 yield x
288 286
289 287 def copylist(self):
290 288 return ['requires'] + _data.split()
291 289
292 290 def write(self):
293 291 pass
294 292
295 293 class encodedstore(basicstore):
296 294 def __init__(self, path, openertype):
297 295 self.path = path + '/store'
298 296 self.createmode = _calcmode(self.path)
299 297 op = openertype(self.path)
300 298 op.createmode = self.createmode
301 299 self.opener = scmutil.filteropener(op, encodefilename)
302 300
303 301 def datafiles(self):
304 302 for a, b, size in self._walk('data', True):
305 303 try:
306 304 a = decodefilename(a)
307 305 except KeyError:
308 306 a = None
309 307 yield a, b, size
310 308
311 309 def join(self, f):
312 310 return self.path + '/' + encodefilename(f)
313 311
314 312 def copylist(self):
315 313 return (['requires', '00changelog.i'] +
316 314 ['store/' + f for f in _data.split()])
317 315
318 316 class fncache(object):
319 317 # the filename used to be partially encoded
320 318 # hence the encodedir/decodedir dance
321 319 def __init__(self, opener):
322 320 self.opener = opener
323 321 self.entries = None
324 322 self._dirty = False
325 323
326 324 def _load(self):
327 325 '''fill the entries from the fncache file'''
328 326 self._dirty = False
329 327 try:
330 328 fp = self.opener('fncache', mode='rb')
331 329 except IOError:
332 330 # skip nonexistent file
333 331 self.entries = set()
334 332 return
335 333 self.entries = set(map(decodedir, fp.read().splitlines()))
336 334 if '' in self.entries:
337 335 fp.seek(0)
338 336 for n, line in enumerate(fp):
339 337 if not line.rstrip('\n'):
340 338 t = _('invalid entry in fncache, line %s') % (n + 1)
341 339 raise util.Abort(t)
342 340 fp.close()
343 341
344 342 def _write(self, files, atomictemp):
345 343 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
346 344 if files:
347 345 fp.write('\n'.join(map(encodedir, files)) + '\n')
348 346 fp.close()
349 347 self._dirty = False
350 348
351 349 def rewrite(self, files):
352 350 self._write(files, False)
353 351 self.entries = set(files)
354 352
355 353 def write(self):
356 354 if self._dirty:
357 355 self._write(self.entries, True)
358 356
359 357 def add(self, fn):
360 358 if self.entries is None:
361 359 self._load()
362 360 if fn not in self.entries:
363 361 self._dirty = True
364 362 self.entries.add(fn)
365 363
366 364 def __contains__(self, fn):
367 365 if self.entries is None:
368 366 self._load()
369 367 return fn in self.entries
370 368
371 369 def __iter__(self):
372 370 if self.entries is None:
373 371 self._load()
374 372 return iter(self.entries)
375 373
376 374 class _fncacheopener(scmutil.abstractopener):
377 375 def __init__(self, op, fnc, encode):
378 376 self.opener = op
379 377 self.fncache = fnc
380 378 self.encode = encode
381 379
382 380 def _getmustaudit(self):
383 381 return self.opener.mustaudit
384 382
385 383 def _setmustaudit(self, onoff):
386 384 self.opener.mustaudit = onoff
387 385
388 386 mustaudit = property(_getmustaudit, _setmustaudit)
389 387
390 388 def __call__(self, path, mode='r', *args, **kw):
391 389 if mode not in ('r', 'rb') and path.startswith('data/'):
392 390 self.fncache.add(path)
393 391 return self.opener(self.encode(path), mode, *args, **kw)
394 392
395 393 class fncachestore(basicstore):
396 394 def __init__(self, path, openertype, encode):
397 395 self.encode = encode
398 396 self.path = path + '/store'
399 397 self.pathsep = self.path + '/'
400 398 self.createmode = _calcmode(self.path)
401 399 op = openertype(self.path)
402 400 op.createmode = self.createmode
403 401 fnc = fncache(op)
404 402 self.fncache = fnc
405 403 self.opener = _fncacheopener(op, fnc, encode)
406 404
407 405 def join(self, f):
408 406 return self.pathsep + self.encode(f)
409 407
410 408 def getsize(self, path):
411 409 return os.stat(self.pathsep + path).st_size
412 410
413 411 def datafiles(self):
414 412 rewrite = False
415 413 existing = []
416 414 for f in sorted(self.fncache):
417 415 ef = self.encode(f)
418 416 try:
419 417 yield f, ef, self.getsize(ef)
420 418 existing.append(f)
421 419 except OSError, err:
422 420 if err.errno != errno.ENOENT:
423 421 raise
424 422 # nonexistent entry
425 423 rewrite = True
426 424 if rewrite:
427 425 # rewrite fncache to remove nonexistent entries
428 426 # (may be caused by rollback / strip)
429 427 self.fncache.rewrite(existing)
430 428
431 429 def copylist(self):
432 430 d = ('data dh fncache phaseroots obsstore'
433 431 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
434 432 return (['requires', '00changelog.i'] +
435 433 ['store/' + f for f in d.split()])
436 434
437 435 def write(self):
438 436 self.fncache.write()
439 437
440 438 def store(requirements, path, openertype):
441 439 if 'store' in requirements:
442 440 if 'fncache' in requirements:
443 441 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
444 442 encode = lambda f: _hybridencode(f, auxencode)
445 443 return fncachestore(path, openertype, encode)
446 444 return encodedstore(path, openertype)
447 445 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now