##// END OF EJS Templates
store: add a fallback _pathencode Python function...
Adrian Buehlmann -
r17624:ae103510 default
parent child Browse files
Show More
@@ -1,490 +1,494
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util, parsers
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def _encodedir(path):
17 17 '''
18 18 >>> _encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> _encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> _encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 26 '''
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33 33
34 34 def decodedir(path):
35 35 '''
36 36 >>> decodedir('data/foo.i')
37 37 'data/foo.i'
38 38 >>> decodedir('data/foo.i.hg/bla.i')
39 39 'data/foo.i/bla.i'
40 40 >>> decodedir('data/foo.i.hg.hg/bla.i')
41 41 'data/foo.i.hg/bla.i'
42 42 '''
43 43 if ".hg/" not in path:
44 44 return path
45 45 return (path
46 46 .replace(".d.hg/", ".d/")
47 47 .replace(".i.hg/", ".i/")
48 48 .replace(".hg.hg/", ".hg/"))
49 49
50 50 def _buildencodefun():
51 51 '''
52 52 >>> enc, dec = _buildencodefun()
53 53
54 54 >>> enc('nothing/special.txt')
55 55 'nothing/special.txt'
56 56 >>> dec('nothing/special.txt')
57 57 'nothing/special.txt'
58 58
59 59 >>> enc('HELLO')
60 60 '_h_e_l_l_o'
61 61 >>> dec('_h_e_l_l_o')
62 62 'HELLO'
63 63
64 64 >>> enc('hello:world?')
65 65 'hello~3aworld~3f'
66 66 >>> dec('hello~3aworld~3f')
67 67 'hello:world?'
68 68
69 69 >>> enc('the\x07quick\xADshot')
70 70 'the~07quick~adshot'
71 71 >>> dec('the~07quick~adshot')
72 72 'the\\x07quick\\xadshot'
73 73 '''
74 74 e = '_'
75 75 winreserved = [ord(x) for x in '\\:*?"<>|']
76 76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
77 77 for x in (range(32) + range(126, 256) + winreserved):
78 78 cmap[chr(x)] = "~%02x" % x
79 79 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
80 80 cmap[chr(x)] = e + chr(x).lower()
81 81 dmap = {}
82 82 for k, v in cmap.iteritems():
83 83 dmap[v] = k
84 84 def decode(s):
85 85 i = 0
86 86 while i < len(s):
87 87 for l in xrange(1, 4):
88 88 try:
89 89 yield dmap[s[i:i + l]]
90 90 i += l
91 91 break
92 92 except KeyError:
93 93 pass
94 94 else:
95 95 raise KeyError
96 96 return (lambda s: ''.join([cmap[c] for c in s]),
97 97 lambda s: ''.join(list(decode(s))))
98 98
99 99 _encodefname, _decodefname = _buildencodefun()
100 100
101 101 def encodefilename(s):
102 102 '''
103 103 >>> encodefilename('foo.i/bar.d/bla.hg/hi:world?/HELLO')
104 104 'foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o'
105 105 '''
106 106 return _encodefname(encodedir(s))
107 107
108 108 def decodefilename(s):
109 109 '''
110 110 >>> decodefilename('foo.i.hg/bar.d.hg/bla.hg.hg/hi~3aworld~3f/_h_e_l_l_o')
111 111 'foo.i/bar.d/bla.hg/hi:world?/HELLO'
112 112 '''
113 113 return decodedir(_decodefname(s))
114 114
115 115 def _buildlowerencodefun():
116 116 '''
117 117 >>> f = _buildlowerencodefun()
118 118 >>> f('nothing/special.txt')
119 119 'nothing/special.txt'
120 120 >>> f('HELLO')
121 121 'hello'
122 122 >>> f('hello:world?')
123 123 'hello~3aworld~3f'
124 124 >>> f('the\x07quick\xADshot')
125 125 'the~07quick~adshot'
126 126 '''
127 127 winreserved = [ord(x) for x in '\\:*?"<>|']
128 128 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
129 129 for x in (range(32) + range(126, 256) + winreserved):
130 130 cmap[chr(x)] = "~%02x" % x
131 131 for x in range(ord("A"), ord("Z")+1):
132 132 cmap[chr(x)] = chr(x).lower()
133 133 return lambda s: "".join([cmap[c] for c in s])
134 134
135 135 lowerencode = _buildlowerencodefun()
136 136
137 137 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
138 138 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
139 139 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
140 140 def _auxencode(path, dotencode):
141 141 '''
142 142 Encodes filenames containing names reserved by Windows or which end in
143 143 period or space. Does not touch other single reserved characters c.
144 144 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
145 145 Additionally encodes space or period at the beginning, if dotencode is
146 146 True. Parameter path is assumed to be all lowercase.
147 147 A segment only needs encoding if a reserved name appears as a
148 148 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
149 149 doesn't need encoding.
150 150
151 151 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
152 152 >>> _auxencode(s.split('/'), True)
153 153 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
154 154 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
155 155 >>> _auxencode(s.split('/'), False)
156 156 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
157 157 >>> _auxencode(['foo. '], True)
158 158 ['foo.~20']
159 159 >>> _auxencode([' .foo'], True)
160 160 ['~20.foo']
161 161 '''
162 162 for i, n in enumerate(path):
163 163 if not n:
164 164 continue
165 165 if dotencode and n[0] in '. ':
166 166 n = "~%02x" % ord(n[0]) + n[1:]
167 167 path[i] = n
168 168 else:
169 169 l = n.find('.')
170 170 if l == -1:
171 171 l = len(n)
172 172 if ((l == 3 and n[:3] in _winres3) or
173 173 (l == 4 and n[3] <= '9' and n[3] >= '1'
174 174 and n[:3] in _winres4)):
175 175 # encode third letter ('aux' -> 'au~78')
176 176 ec = "~%02x" % ord(n[2])
177 177 n = n[0:2] + ec + n[3:]
178 178 path[i] = n
179 179 if n[-1] in '. ':
180 180 # encode last period or space ('foo...' -> 'foo..~2e')
181 181 path[i] = n[:-1] + "~%02x" % ord(n[-1])
182 182 return path
183 183
184 184 _maxstorepathlen = 120
185 185 _dirprefixlen = 8
186 186 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
187 187
188 188 def _hashencode(path, dotencode):
189 189 digest = _sha(path).hexdigest()
190 190 le = lowerencode(path).split('/')[1:]
191 191 parts = _auxencode(le, dotencode)
192 192 basename = parts[-1]
193 193 _root, ext = os.path.splitext(basename)
194 194 sdirs = []
195 195 sdirslen = 0
196 196 for p in parts[:-1]:
197 197 d = p[:_dirprefixlen]
198 198 if d[-1] in '. ':
199 199 # Windows can't access dirs ending in period or space
200 200 d = d[:-1] + '_'
201 201 if sdirslen == 0:
202 202 t = len(d)
203 203 else:
204 204 t = sdirslen + 1 + len(d)
205 205 if t > _maxshortdirslen:
206 206 break
207 207 sdirs.append(d)
208 208 sdirslen = t
209 209 dirs = '/'.join(sdirs)
210 210 if len(dirs) > 0:
211 211 dirs += '/'
212 212 res = 'dh/' + dirs + digest + ext
213 213 spaceleft = _maxstorepathlen - len(res)
214 214 if spaceleft > 0:
215 215 filler = basename[:spaceleft]
216 216 res = 'dh/' + dirs + filler + digest + ext
217 217 return res
218 218
219 219 def _hybridencode(path, dotencode):
220 220 '''encodes path with a length limit
221 221
222 222 Encodes all paths that begin with 'data/', according to the following.
223 223
224 224 Default encoding (reversible):
225 225
226 226 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
227 227 characters are encoded as '~xx', where xx is the two digit hex code
228 228 of the character (see encodefilename).
229 229 Relevant path components consisting of Windows reserved filenames are
230 230 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
231 231
232 232 Hashed encoding (not reversible):
233 233
234 234 If the default-encoded path is longer than _maxstorepathlen, a
235 235 non-reversible hybrid hashing of the path is done instead.
236 236 This encoding uses up to _dirprefixlen characters of all directory
237 237 levels of the lowerencoded path, but not more levels than can fit into
238 238 _maxshortdirslen.
239 239 Then follows the filler followed by the sha digest of the full path.
240 240 The filler is the beginning of the basename of the lowerencoded path
241 241 (the basename is everything after the last path separator). The filler
242 242 is as long as possible, filling in characters from the basename until
243 243 the encoded path has _maxstorepathlen characters (or all chars of the
244 244 basename have been taken).
245 245 The extension (e.g. '.i' or '.d') is preserved.
246 246
247 247 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
248 248 encoding was used.
249 249 '''
250 250 path = encodedir(path)
251 251 ef = _encodefname(path).split('/')
252 252 res = '/'.join(_auxencode(ef, dotencode))
253 253 if len(res) > _maxstorepathlen:
254 254 res = _hashencode(path, dotencode)
255 255 return res
256 256
257 def _plainhybridencode(f):
258 return _hybridencode(f, False)
257 def _pathencode(path):
258 ef = _encodefname(encodedir(path)).split('/')
259 res = '/'.join(_auxencode(ef, True))
260 if len(res) > _maxstorepathlen:
261 return None
262 return res
259 263
260 _pathencode = getattr(parsers, 'pathencode', None)
261 if _pathencode:
264 _pathencode = getattr(parsers, 'pathencode', _pathencode)
265
262 266 def _dothybridencode(f):
263 267 ef = _pathencode(f)
264 268 if ef is None:
265 269 return _hashencode(encodedir(f), True)
266 270 return ef
267 else:
268 def _dothybridencode(f):
269 return _hybridencode(f, True)
271
272 def _plainhybridencode(f):
273 return _hybridencode(f, False)
270 274
271 275 def _calcmode(path):
272 276 try:
273 277 # files in .hg/ will be created using this mode
274 278 mode = os.stat(path).st_mode
275 279 # avoid some useless chmods
276 280 if (0777 & ~util.umask) == (0777 & mode):
277 281 mode = None
278 282 except OSError:
279 283 mode = None
280 284 return mode
281 285
282 286 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
283 287 ' phaseroots obsstore')
284 288
285 289 class basicstore(object):
286 290 '''base class for local repository stores'''
287 291 def __init__(self, path, openertype):
288 292 self.path = path
289 293 self.createmode = _calcmode(path)
290 294 op = openertype(self.path)
291 295 op.createmode = self.createmode
292 296 self.opener = scmutil.filteropener(op, encodedir)
293 297
294 298 def join(self, f):
295 299 return self.path + '/' + encodedir(f)
296 300
297 301 def _walk(self, relpath, recurse):
298 302 '''yields (unencoded, encoded, size)'''
299 303 path = self.path
300 304 if relpath:
301 305 path += '/' + relpath
302 306 striplen = len(self.path) + 1
303 307 l = []
304 308 if os.path.isdir(path):
305 309 visit = [path]
306 310 while visit:
307 311 p = visit.pop()
308 312 for f, kind, st in osutil.listdir(p, stat=True):
309 313 fp = p + '/' + f
310 314 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
311 315 n = util.pconvert(fp[striplen:])
312 316 l.append((decodedir(n), n, st.st_size))
313 317 elif kind == stat.S_IFDIR and recurse:
314 318 visit.append(fp)
315 319 l.sort()
316 320 return l
317 321
318 322 def datafiles(self):
319 323 return self._walk('data', True)
320 324
321 325 def walk(self):
322 326 '''yields (unencoded, encoded, size)'''
323 327 # yield data files first
324 328 for x in self.datafiles():
325 329 yield x
326 330 # yield manifest before changelog
327 331 for x in reversed(self._walk('', False)):
328 332 yield x
329 333
330 334 def copylist(self):
331 335 return ['requires'] + _data.split()
332 336
333 337 def write(self):
334 338 pass
335 339
336 340 class encodedstore(basicstore):
337 341 def __init__(self, path, openertype):
338 342 self.path = path + '/store'
339 343 self.createmode = _calcmode(self.path)
340 344 op = openertype(self.path)
341 345 op.createmode = self.createmode
342 346 self.opener = scmutil.filteropener(op, encodefilename)
343 347
344 348 def datafiles(self):
345 349 for a, b, size in self._walk('data', True):
346 350 try:
347 351 a = decodefilename(a)
348 352 except KeyError:
349 353 a = None
350 354 yield a, b, size
351 355
352 356 def join(self, f):
353 357 return self.path + '/' + encodefilename(f)
354 358
355 359 def copylist(self):
356 360 return (['requires', '00changelog.i'] +
357 361 ['store/' + f for f in _data.split()])
358 362
359 363 class fncache(object):
360 364 # the filename used to be partially encoded
361 365 # hence the encodedir/decodedir dance
362 366 def __init__(self, opener):
363 367 self.opener = opener
364 368 self.entries = None
365 369 self._dirty = False
366 370
367 371 def _load(self):
368 372 '''fill the entries from the fncache file'''
369 373 self._dirty = False
370 374 try:
371 375 fp = self.opener('fncache', mode='rb')
372 376 except IOError:
373 377 # skip nonexistent file
374 378 self.entries = set()
375 379 return
376 380 self.entries = set(decodedir(fp.read()).splitlines())
377 381 if '' in self.entries:
378 382 fp.seek(0)
379 383 for n, line in enumerate(fp):
380 384 if not line.rstrip('\n'):
381 385 t = _('invalid entry in fncache, line %s') % (n + 1)
382 386 raise util.Abort(t)
383 387 fp.close()
384 388
385 389 def _write(self, files, atomictemp):
386 390 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
387 391 if files:
388 392 fp.write(encodedir('\n'.join(files) + '\n'))
389 393 fp.close()
390 394 self._dirty = False
391 395
392 396 def rewrite(self, files):
393 397 self._write(files, False)
394 398 self.entries = set(files)
395 399
396 400 def write(self):
397 401 if self._dirty:
398 402 self._write(self.entries, True)
399 403
400 404 def add(self, fn):
401 405 if self.entries is None:
402 406 self._load()
403 407 if fn not in self.entries:
404 408 self._dirty = True
405 409 self.entries.add(fn)
406 410
407 411 def __contains__(self, fn):
408 412 if self.entries is None:
409 413 self._load()
410 414 return fn in self.entries
411 415
412 416 def __iter__(self):
413 417 if self.entries is None:
414 418 self._load()
415 419 return iter(self.entries)
416 420
417 421 class _fncacheopener(scmutil.abstractopener):
418 422 def __init__(self, op, fnc, encode):
419 423 self.opener = op
420 424 self.fncache = fnc
421 425 self.encode = encode
422 426
423 427 def _getmustaudit(self):
424 428 return self.opener.mustaudit
425 429
426 430 def _setmustaudit(self, onoff):
427 431 self.opener.mustaudit = onoff
428 432
429 433 mustaudit = property(_getmustaudit, _setmustaudit)
430 434
431 435 def __call__(self, path, mode='r', *args, **kw):
432 436 if mode not in ('r', 'rb') and path.startswith('data/'):
433 437 self.fncache.add(path)
434 438 return self.opener(self.encode(path), mode, *args, **kw)
435 439
436 440 class fncachestore(basicstore):
437 441 def __init__(self, path, openertype, dotencode):
438 442 if dotencode:
439 443 encode = _dothybridencode
440 444 else:
441 445 encode = _plainhybridencode
442 446 self.encode = encode
443 447 self.path = path + '/store'
444 448 self.pathsep = self.path + '/'
445 449 self.createmode = _calcmode(self.path)
446 450 op = openertype(self.path)
447 451 op.createmode = self.createmode
448 452 fnc = fncache(op)
449 453 self.fncache = fnc
450 454 self.opener = _fncacheopener(op, fnc, encode)
451 455
452 456 def join(self, f):
453 457 return self.pathsep + self.encode(f)
454 458
455 459 def getsize(self, path):
456 460 return os.stat(self.pathsep + path).st_size
457 461
458 462 def datafiles(self):
459 463 rewrite = False
460 464 existing = []
461 465 for f in sorted(self.fncache):
462 466 ef = self.encode(f)
463 467 try:
464 468 yield f, ef, self.getsize(ef)
465 469 existing.append(f)
466 470 except OSError, err:
467 471 if err.errno != errno.ENOENT:
468 472 raise
469 473 # nonexistent entry
470 474 rewrite = True
471 475 if rewrite:
472 476 # rewrite fncache to remove nonexistent entries
473 477 # (may be caused by rollback / strip)
474 478 self.fncache.rewrite(existing)
475 479
476 480 def copylist(self):
477 481 d = ('data dh fncache phaseroots obsstore'
478 482 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
479 483 return (['requires', '00changelog.i'] +
480 484 ['store/' + f for f in d.split()])
481 485
482 486 def write(self):
483 487 self.fncache.write()
484 488
485 489 def store(requirements, path, openertype):
486 490 if 'store' in requirements:
487 491 if 'fncache' in requirements:
488 492 return fncachestore(path, openertype, 'dotencode' in requirements)
489 493 return encodedstore(path, openertype)
490 494 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now