##// END OF EJS Templates
store: move _plainhybridencode and _dothybridencode higher up in the file...
Adrian Buehlmann -
r17623:448e6ed7 default
parent child Browse files
Show More
@@ -1,490 +1,490 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util, parsers
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def _encodedir(path):
17 17 '''
18 18 >>> _encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> _encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> _encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 26 '''
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33 33
34 34 def decodedir(path):
35 35 '''
36 36 >>> decodedir('data/foo.i')
37 37 'data/foo.i'
38 38 >>> decodedir('data/foo.i.hg/bla.i')
39 39 'data/foo.i/bla.i'
40 40 >>> decodedir('data/foo.i.hg.hg/bla.i')
41 41 'data/foo.i.hg/bla.i'
42 42 '''
43 43 if ".hg/" not in path:
44 44 return path
45 45 return (path
46 46 .replace(".d.hg/", ".d/")
47 47 .replace(".i.hg/", ".i/")
48 48 .replace(".hg.hg/", ".hg/"))
49 49
50 50 def _buildencodefun():
51 51 '''
52 52 >>> enc, dec = _buildencodefun()
53 53
54 54 >>> enc('nothing/special.txt')
55 55 'nothing/special.txt'
56 56 >>> dec('nothing/special.txt')
57 57 'nothing/special.txt'
58 58
59 59 >>> enc('HELLO')
60 60 '_h_e_l_l_o'
61 61 >>> dec('_h_e_l_l_o')
62 62 'HELLO'
63 63
64 64 >>> enc('hello:world?')
65 65 'hello~3aworld~3f'
66 66 >>> dec('hello~3aworld~3f')
67 67 'hello:world?'
68 68
69 69 >>> enc('the\x07quick\xADshot')
70 70 'the~07quick~adshot'
71 71 >>> dec('the~07quick~adshot')
72 72 'the\\x07quick\\xadshot'
73 73 '''
74 74 e = '_'
75 75 winreserved = [ord(x) for x in '\\:*?"<>|']
76 76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
77 77 for x in (range(32) + range(126, 256) + winreserved):
78 78 cmap[chr(x)] = "~%02x" % x
79 79 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
80 80 cmap[chr(x)] = e + chr(x).lower()
81 81 dmap = {}
82 82 for k, v in cmap.iteritems():
83 83 dmap[v] = k
84 84 def decode(s):
85 85 i = 0
86 86 while i < len(s):
87 87 for l in xrange(1, 4):
88 88 try:
89 89 yield dmap[s[i:i + l]]
90 90 i += l
91 91 break
92 92 except KeyError:
93 93 pass
94 94 else:
95 95 raise KeyError
96 96 return (lambda s: ''.join([cmap[c] for c in s]),
97 97 lambda s: ''.join(list(decode(s))))
98 98
99 99 _encodefname, _decodefname = _buildencodefun()
100 100
101 101 def encodefilename(s):
102 102 '''
103 103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
104 104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
105 105 '''
106 106 return _encodefname(encodedir(s))
107 107
108 108 def decodefilename(s):
109 109 '''
110 110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
111 111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
112 112 '''
113 113 return decodedir(_decodefname(s))
114 114
115 115 def _buildlowerencodefun():
116 116 '''
117 117 >>> f = _buildlowerencodefun()
118 118 >>> f('nothing/special.txt')
119 119 'nothing/special.txt'
120 120 >>> f('HELLO')
121 121 'hello'
122 122 >>> f('hello:world?')
123 123 'hello~3aworld~3f'
124 124 >>> f('the\x07quick\xADshot')
125 125 'the~07quick~adshot'
126 126 '''
127 127 winreserved = [ord(x) for x in '\\:*?"<>|']
128 128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
129 129 for x in (range(32) + range(126, 256) + winreserved):
130 130 cmap[chr(x)] = "~%02x" % x
131 131 for x in range(ord("A"), ord("Z")+1):
132 132 cmap[chr(x)] = chr(x).lower()
133 133 return lambda s: "".join([cmap[c] for c in s])
134 134
135 135 lowerencode = _buildlowerencodefun()
136 136
137 137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
138 138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
139 139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
140 140 def _auxencode(path, dotencode):
141 141 '''
142 142 Encodes filenames containing names reserved by Windows or which end in
143 143 period or space. Does not touch other single reserved characters c.
144 144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
145 145 Additionally encodes space or period at the beginning, if dotencode is
146 146 True. Parameter path is assumed to be all lowercase.
147 147 A segment only needs encoding if a reserved name appears as a
148 148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
149 149 doesn't need encoding.
150 150
151 151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
152 152 >>> _auxencode(s.split('/'), True)
153 153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
154 154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
155 155 >>> _auxencode(s.split('/'), False)
156 156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
157 157 >>> _auxencode(['foo. '], True)
158 158 ['foo.~20']
159 159 >>> _auxencode([' .foo'], True)
160 160 ['~20.foo']
161 161 '''
162 162 for i, n in enumerate(path):
163 163 if not n:
164 164 continue
165 165 if dotencode and n[0] in '. ':
166 166 n = "~%02x" % ord(n[0]) + n[1:]
167 167 path[i] = n
168 168 else:
169 169 l = n.find('.')
170 170 if l == -1:
171 171 l = len(n)
172 172 if ((l == 3 and n[:3] in _winres3) or
173 173 (l == 4 and n[3] <= '9' and n[3] >= '1'
174 174 and n[:3] in _winres4)):
175 175 # encode third letter ('aux' -> 'au~78')
176 176 ec = "~%02x" % ord(n[2])
177 177 n = n[0:2] + ec + n[3:]
178 178 path[i] = n
179 179 if n[-1] in '. ':
180 180 # encode last period or space ('foo...' -> 'foo..~2e')
181 181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
182 182 return path
183 183
184 184 _maxstorepathlen = 120
185 185 _dirprefixlen = 8
186 186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
187 187
188 188 def _hashencode(path, dotencode):
189 189 digest = _sha(path).hexdigest()
190 190 le = lowerencode(path).split('/')[1:]
191 191 parts = _auxencode(le, dotencode)
192 192 basename = parts[-1]
193 193 _root, ext = os.path.splitext(basename)
194 194 sdirs = []
195 195 sdirslen = 0
196 196 for p in parts[:-1]:
197 197 d = p[:_dirprefixlen]
198 198 if d[-1] in '. ':
199 199 # Windows can't access dirs ending in period or space
200 200 d = d[:-1] + '_'
201 201 if sdirslen == 0:
202 202 t = len(d)
203 203 else:
204 204 t = sdirslen + 1 + len(d)
205 205 if t > _maxshortdirslen:
206 206 break
207 207 sdirs.append(d)
208 208 sdirslen = t
209 209 dirs = '/'.join(sdirs)
210 210 if len(dirs) > 0:
211 211 dirs += '/'
212 212 res = 'dh/' + dirs + digest + ext
213 213 spaceleft = _maxstorepathlen - len(res)
214 214 if spaceleft > 0:
215 215 filler = basename[:spaceleft]
216 216 res = 'dh/' + dirs + filler + digest + ext
217 217 return res
218 218
219 219 def _hybridencode(path, dotencode):
220 220 '''encodes path with a length limit
221 221
222 222 Encodes all paths that begin with 'data/', according to the following.
223 223
224 224 Default encoding (reversible):
225 225
226 226 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
227 227 characters are encoded as '~xx', where xx is the two digit hex code
228 228 of the character (see encodefilename).
229 229 Relevant path components consisting of Windows reserved filenames are
230 230 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
231 231
232 232 Hashed encoding (not reversible):
233 233
234 234 If the default-encoded path is longer than _maxstorepathlen, a
235 235 non-reversible hybrid hashing of the path is done instead.
236 236 This encoding uses up to _dirprefixlen characters of all directory
237 237 levels of the lowerencoded path, but not more levels than can fit into
238 238 _maxshortdirslen.
239 239 Then follows the filler followed by the sha digest of the full path.
240 240 The filler is the beginning of the basename of the lowerencoded path
241 241 (the basename is everything after the last path separator). The filler
242 242 is as long as possible, filling in characters from the basename until
243 243 the encoded path has _maxstorepathlen characters (or all chars of the
244 244 basename have been taken).
245 245 The extension (e.g. '.i' or '.d') is preserved.
246 246
247 247 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
248 248 encoding was used.
249 249 '''
250 250 path = encodedir(path)
251 251 ef = _encodefname(path).split('/')
252 252 res = '/'.join(_auxencode(ef, dotencode))
253 253 if len(res) > _maxstorepathlen:
254 254 res = _hashencode(path, dotencode)
255 255 return res
256 256
257 def _plainhybridencode(f):
258 return _hybridencode(f, False)
259
260 _pathencode = getattr(parsers, 'pathencode', None)
261 if _pathencode:
262 def _dothybridencode(f):
263 ef = _pathencode(f)
264 if ef is None:
265 return _hashencode(encodedir(f), True)
266 return ef
267 else:
268 def _dothybridencode(f):
269 return _hybridencode(f, True)
270
257 271 def _calcmode(path):
258 272 try:
259 273 # files in .hg/ will be created using this mode
260 274 mode = os.stat(path).st_mode
261 275 # avoid some useless chmods
262 276 if (0777 & ~util.umask) == (0777 & mode):
263 277 mode = None
264 278 except OSError:
265 279 mode = None
266 280 return mode
267 281
268 282 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
269 283 ' phaseroots obsstore')
270 284
271 285 class basicstore(object):
272 286 '''base class for local repository stores'''
273 287 def __init__(self, path, openertype):
274 288 self.path = path
275 289 self.createmode = _calcmode(path)
276 290 op = openertype(self.path)
277 291 op.createmode = self.createmode
278 292 self.opener = scmutil.filteropener(op, encodedir)
279 293
280 294 def join(self, f):
281 295 return self.path + '/' + encodedir(f)
282 296
283 297 def _walk(self, relpath, recurse):
284 298 '''yields (unencoded, encoded, size)'''
285 299 path = self.path
286 300 if relpath:
287 301 path += '/' + relpath
288 302 striplen = len(self.path) + 1
289 303 l = []
290 304 if os.path.isdir(path):
291 305 visit = [path]
292 306 while visit:
293 307 p = visit.pop()
294 308 for f, kind, st in osutil.listdir(p, stat=True):
295 309 fp = p + '/' + f
296 310 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
297 311 n = util.pconvert(fp[striplen:])
298 312 l.append((decodedir(n), n, st.st_size))
299 313 elif kind == stat.S_IFDIR and recurse:
300 314 visit.append(fp)
301 315 l.sort()
302 316 return l
303 317
304 318 def datafiles(self):
305 319 return self._walk('data', True)
306 320
307 321 def walk(self):
308 322 '''yields (unencoded, encoded, size)'''
309 323 # yield data files first
310 324 for x in self.datafiles():
311 325 yield x
312 326 # yield manifest before changelog
313 327 for x in reversed(self._walk('', False)):
314 328 yield x
315 329
316 330 def copylist(self):
317 331 return ['requires'] + _data.split()
318 332
319 333 def write(self):
320 334 pass
321 335
322 336 class encodedstore(basicstore):
323 337 def __init__(self, path, openertype):
324 338 self.path = path + '/store'
325 339 self.createmode = _calcmode(self.path)
326 340 op = openertype(self.path)
327 341 op.createmode = self.createmode
328 342 self.opener = scmutil.filteropener(op, encodefilename)
329 343
330 344 def datafiles(self):
331 345 for a, b, size in self._walk('data', True):
332 346 try:
333 347 a = decodefilename(a)
334 348 except KeyError:
335 349 a = None
336 350 yield a, b, size
337 351
338 352 def join(self, f):
339 353 return self.path + '/' + encodefilename(f)
340 354
341 355 def copylist(self):
342 356 return (['requires', '00changelog.i'] +
343 357 ['store/' + f for f in _data.split()])
344 358
345 359 class fncache(object):
346 360 # the filename used to be partially encoded
347 361 # hence the encodedir/decodedir dance
348 362 def __init__(self, opener):
349 363 self.opener = opener
350 364 self.entries = None
351 365 self._dirty = False
352 366
353 367 def _load(self):
354 368 '''fill the entries from the fncache file'''
355 369 self._dirty = False
356 370 try:
357 371 fp = self.opener('fncache', mode='rb')
358 372 except IOError:
359 373 # skip nonexistent file
360 374 self.entries = set()
361 375 return
362 376 self.entries = set(decodedir(fp.read()).splitlines())
363 377 if '' in self.entries:
364 378 fp.seek(0)
365 379 for n, line in enumerate(fp):
366 380 if not line.rstrip('\n'):
367 381 t = _('invalid entry in fncache, line %s') % (n + 1)
368 382 raise util.Abort(t)
369 383 fp.close()
370 384
371 385 def _write(self, files, atomictemp):
372 386 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
373 387 if files:
374 388 fp.write(encodedir('\n'.join(files) + '\n'))
375 389 fp.close()
376 390 self._dirty = False
377 391
378 392 def rewrite(self, files):
379 393 self._write(files, False)
380 394 self.entries = set(files)
381 395
382 396 def write(self):
383 397 if self._dirty:
384 398 self._write(self.entries, True)
385 399
386 400 def add(self, fn):
387 401 if self.entries is None:
388 402 self._load()
389 403 if fn not in self.entries:
390 404 self._dirty = True
391 405 self.entries.add(fn)
392 406
393 407 def __contains__(self, fn):
394 408 if self.entries is None:
395 409 self._load()
396 410 return fn in self.entries
397 411
398 412 def __iter__(self):
399 413 if self.entries is None:
400 414 self._load()
401 415 return iter(self.entries)
402 416
403 417 class _fncacheopener(scmutil.abstractopener):
404 418 def __init__(self, op, fnc, encode):
405 419 self.opener = op
406 420 self.fncache = fnc
407 421 self.encode = encode
408 422
409 423 def _getmustaudit(self):
410 424 return self.opener.mustaudit
411 425
412 426 def _setmustaudit(self, onoff):
413 427 self.opener.mustaudit = onoff
414 428
415 429 mustaudit = property(_getmustaudit, _setmustaudit)
416 430
417 431 def __call__(self, path, mode='r', *args, **kw):
418 432 if mode not in ('r', 'rb') and path.startswith('data/'):
419 433 self.fncache.add(path)
420 434 return self.opener(self.encode(path), mode, *args, **kw)
421 435
422 def _plainhybridencode(f):
423 return _hybridencode(f, False)
424
425 _pathencode = getattr(parsers, 'pathencode', None)
426 if _pathencode:
427 def _dothybridencode(f):
428 ef = _pathencode(f)
429 if ef is None:
430 return _hashencode(encodedir(f), True)
431 return ef
432 else:
433 def _dothybridencode(f):
434 return _hybridencode(f, True)
435
436 436 class fncachestore(basicstore):
437 437 def __init__(self, path, openertype, dotencode):
438 438 if dotencode:
439 439 encode = _dothybridencode
440 440 else:
441 441 encode = _plainhybridencode
442 442 self.encode = encode
443 443 self.path = path + '/store'
444 444 self.pathsep = self.path + '/'
445 445 self.createmode = _calcmode(self.path)
446 446 op = openertype(self.path)
447 447 op.createmode = self.createmode
448 448 fnc = fncache(op)
449 449 self.fncache = fnc
450 450 self.opener = _fncacheopener(op, fnc, encode)
451 451
452 452 def join(self, f):
453 453 return self.pathsep + self.encode(f)
454 454
455 455 def getsize(self, path):
456 456 return os.stat(self.pathsep + path).st_size
457 457
458 458 def datafiles(self):
459 459 rewrite = False
460 460 existing = []
461 461 for f in sorted(self.fncache):
462 462 ef = self.encode(f)
463 463 try:
464 464 yield f, ef, self.getsize(ef)
465 465 existing.append(f)
466 466 except OSError, err:
467 467 if err.errno != errno.ENOENT:
468 468 raise
469 469 # nonexistent entry
470 470 rewrite = True
471 471 if rewrite:
472 472 # rewrite fncache to remove nonexistent entries
473 473 # (may be caused by rollback / strip)
474 474 self.fncache.rewrite(existing)
475 475
476 476 def copylist(self):
477 477 d = ('data dh fncache phaseroots obsstore'
478 478 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
479 479 return (['requires', '00changelog.i'] +
480 480 ['store/' + f for f in d.split()])
481 481
482 482 def write(self):
483 483 self.fncache.write()
484 484
485 485 def store(requirements, path, openertype):
486 486 if 'store' in requirements:
487 487 if 'fncache' in requirements:
488 488 return fncachestore(path, openertype, 'dotencode' in requirements)
489 489 return encodedstore(path, openertype)
490 490 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now