##// END OF EJS Templates
store: use fast C implementation of encodedir() if it's available...
Adrian Buehlmann -
r17607:cc58dc47 default
parent child Browse files
Show More
@@ -1,461 +1,463
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 import osutil, scmutil, util
9 import osutil, scmutil, util, parsers
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 def encodedir(path):
16 def _encodedir(path):
17 17 '''
18 >>> encodedir('data/foo.i')
18 >>> _encodedir('data/foo.i')
19 19 'data/foo.i'
20 >>> encodedir('data/foo.i/bla.i')
20 >>> _encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 >>> encodedir('data/foo.i.hg/bla.i')
22 >>> _encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 >>> encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
24 >>> _encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
26 26 '''
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 encodedir = getattr(parsers, 'encodedir', _encodedir)
33
32 34 def decodedir(path):
33 35 '''
34 36 >>> decodedir('data/foo.i')
35 37 'data/foo.i'
36 38 >>> decodedir('data/foo.i.hg/bla.i')
37 39 'data/foo.i/bla.i'
38 40 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 41 'data/foo.i.hg/bla.i'
40 42 '''
41 43 if ".hg/" not in path:
42 44 return path
43 45 return (path
44 46 .replace(".d.hg/", ".d/")
45 47 .replace(".i.hg/", ".i/")
46 48 .replace(".hg.hg/", ".hg/"))
47 49
48 50 def _buildencodefun():
49 51 '''
50 52 >>> enc, dec = _buildencodefun()
51 53
52 54 >>> enc('nothing/special.txt')
53 55 'nothing/special.txt'
54 56 >>> dec('nothing/special.txt')
55 57 'nothing/special.txt'
56 58
57 59 >>> enc('HELLO')
58 60 '_h_e_l_l_o'
59 61 >>> dec('_h_e_l_l_o')
60 62 'HELLO'
61 63
62 64 >>> enc('hello:world?')
63 65 'hello~3aworld~3f'
64 66 >>> dec('hello~3aworld~3f')
65 67 'hello:world?'
66 68
67 69 >>> enc('the\x07quick\xADshot')
68 70 'the~07quick~adshot'
69 71 >>> dec('the~07quick~adshot')
70 72 'the\\x07quick\\xadshot'
71 73 '''
72 74 e = '_'
73 75 winreserved = [ord(x) for x in '\\:*?"<>|']
74 76 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 77 for x in (range(32) + range(126, 256) + winreserved):
76 78 cmap[chr(x)] = "~%02x" % x
77 79 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 80 cmap[chr(x)] = e + chr(x).lower()
79 81 dmap = {}
80 82 for k, v in cmap.iteritems():
81 83 dmap[v] = k
82 84 def decode(s):
83 85 i = 0
84 86 while i < len(s):
85 87 for l in xrange(1, 4):
86 88 try:
87 89 yield dmap[s[i:i + l]]
88 90 i += l
89 91 break
90 92 except KeyError:
91 93 pass
92 94 else:
93 95 raise KeyError
94 96 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 97 lambda s: decodedir("".join(list(decode(s)))))
96 98
97 99 encodefilename, decodefilename = _buildencodefun()
98 100
99 101 def _buildlowerencodefun():
100 102 '''
101 103 >>> f = _buildlowerencodefun()
102 104 >>> f('nothing/special.txt')
103 105 'nothing/special.txt'
104 106 >>> f('HELLO')
105 107 'hello'
106 108 >>> f('hello:world?')
107 109 'hello~3aworld~3f'
108 110 >>> f('the\x07quick\xADshot')
109 111 'the~07quick~adshot'
110 112 '''
111 113 winreserved = [ord(x) for x in '\\:*?"<>|']
112 114 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 115 for x in (range(32) + range(126, 256) + winreserved):
114 116 cmap[chr(x)] = "~%02x" % x
115 117 for x in range(ord("A"), ord("Z")+1):
116 118 cmap[chr(x)] = chr(x).lower()
117 119 return lambda s: "".join([cmap[c] for c in s])
118 120
119 121 lowerencode = _buildlowerencodefun()
120 122
121 123 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
122 124 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
123 125 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
124 126 def _auxencode(path, dotencode):
125 127 '''
126 128 Encodes filenames containing names reserved by Windows or which end in
127 129 period or space. Does not touch other single reserved characters c.
128 130 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 131 Additionally encodes space or period at the beginning, if dotencode is
130 132 True. Parameter path is assumed to be all lowercase.
131 133 A segment only needs encoding if a reserved name appears as a
132 134 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
133 135 doesn't need encoding.
134 136
135 137 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
136 138 >>> _auxencode(s.split('/'), True)
137 139 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
138 140 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
139 141 >>> _auxencode(s.split('/'), False)
140 142 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
141 143 >>> _auxencode(['foo. '], True)
142 144 ['foo.~20']
143 145 >>> _auxencode([' .foo'], True)
144 146 ['~20.foo']
145 147 '''
146 148 for i, n in enumerate(path):
147 149 if not n:
148 150 continue
149 151 if dotencode and n[0] in '. ':
150 152 n = "~%02x" % ord(n[0]) + n[1:]
151 153 path[i] = n
152 154 else:
153 155 l = n.find('.')
154 156 if l == -1:
155 157 l = len(n)
156 158 if ((l == 3 and n[:3] in _winres3) or
157 159 (l == 4 and n[3] <= '9' and n[3] >= '1'
158 160 and n[:3] in _winres4)):
159 161 # encode third letter ('aux' -> 'au~78')
160 162 ec = "~%02x" % ord(n[2])
161 163 n = n[0:2] + ec + n[3:]
162 164 path[i] = n
163 165 if n[-1] in '. ':
164 166 # encode last period or space ('foo...' -> 'foo..~2e')
165 167 path[i] = n[:-1] + "~%02x" % ord(n[-1])
166 168 return path
167 169
168 170 _maxstorepathlen = 120
169 171 _dirprefixlen = 8
170 172 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
171 173 def _hybridencode(path, dotencode):
172 174 '''encodes path with a length limit
173 175
174 176 Encodes all paths that begin with 'data/', according to the following.
175 177
176 178 Default encoding (reversible):
177 179
178 180 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
179 181 characters are encoded as '~xx', where xx is the two digit hex code
180 182 of the character (see encodefilename).
181 183 Relevant path components consisting of Windows reserved filenames are
182 184 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
183 185
184 186 Hashed encoding (not reversible):
185 187
186 188 If the default-encoded path is longer than _maxstorepathlen, a
187 189 non-reversible hybrid hashing of the path is done instead.
188 190 This encoding uses up to _dirprefixlen characters of all directory
189 191 levels of the lowerencoded path, but not more levels than can fit into
190 192 _maxshortdirslen.
191 193 Then follows the filler followed by the sha digest of the full path.
192 194 The filler is the beginning of the basename of the lowerencoded path
193 195 (the basename is everything after the last path separator). The filler
194 196 is as long as possible, filling in characters from the basename until
195 197 the encoded path has _maxstorepathlen characters (or all chars of the
196 198 basename have been taken).
197 199 The extension (e.g. '.i' or '.d') is preserved.
198 200
199 201 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
200 202 encoding was used.
201 203 '''
202 204 ef = encodefilename(path).split('/')
203 205 res = '/'.join(_auxencode(ef, dotencode))
204 206 if len(res) > _maxstorepathlen:
205 207 path = encodedir(path)
206 208 digest = _sha(path).hexdigest()
207 209 le = lowerencode(path).split('/')[1:]
208 210 parts = _auxencode(le, dotencode)
209 211 basename = parts[-1]
210 212 _root, ext = os.path.splitext(basename)
211 213 sdirs = []
212 214 sdirslen = 0
213 215 for p in parts[:-1]:
214 216 d = p[:_dirprefixlen]
215 217 if d[-1] in '. ':
216 218 # Windows can't access dirs ending in period or space
217 219 d = d[:-1] + '_'
218 220 if sdirslen == 0:
219 221 t = len(d)
220 222 else:
221 223 t = sdirslen + 1 + len(d)
222 224 if t > _maxshortdirslen:
223 225 break
224 226 sdirs.append(d)
225 227 sdirslen = t
226 228 dirs = '/'.join(sdirs)
227 229 if len(dirs) > 0:
228 230 dirs += '/'
229 231 res = 'dh/' + dirs + digest + ext
230 232 spaceleft = _maxstorepathlen - len(res)
231 233 if spaceleft > 0:
232 234 filler = basename[:spaceleft]
233 235 res = 'dh/' + dirs + filler + digest + ext
234 236 return res
235 237
236 238 def _calcmode(path):
237 239 try:
238 240 # files in .hg/ will be created using this mode
239 241 mode = os.stat(path).st_mode
240 242 # avoid some useless chmods
241 243 if (0777 & ~util.umask) == (0777 & mode):
242 244 mode = None
243 245 except OSError:
244 246 mode = None
245 247 return mode
246 248
247 249 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
248 250 ' phaseroots obsstore')
249 251
250 252 class basicstore(object):
251 253 '''base class for local repository stores'''
252 254 def __init__(self, path, openertype):
253 255 self.path = path
254 256 self.createmode = _calcmode(path)
255 257 op = openertype(self.path)
256 258 op.createmode = self.createmode
257 259 self.opener = scmutil.filteropener(op, encodedir)
258 260
259 261 def join(self, f):
260 262 return self.path + '/' + encodedir(f)
261 263
262 264 def _walk(self, relpath, recurse):
263 265 '''yields (unencoded, encoded, size)'''
264 266 path = self.path
265 267 if relpath:
266 268 path += '/' + relpath
267 269 striplen = len(self.path) + 1
268 270 l = []
269 271 if os.path.isdir(path):
270 272 visit = [path]
271 273 while visit:
272 274 p = visit.pop()
273 275 for f, kind, st in osutil.listdir(p, stat=True):
274 276 fp = p + '/' + f
275 277 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
276 278 n = util.pconvert(fp[striplen:])
277 279 l.append((decodedir(n), n, st.st_size))
278 280 elif kind == stat.S_IFDIR and recurse:
279 281 visit.append(fp)
280 282 l.sort()
281 283 return l
282 284
283 285 def datafiles(self):
284 286 return self._walk('data', True)
285 287
286 288 def walk(self):
287 289 '''yields (unencoded, encoded, size)'''
288 290 # yield data files first
289 291 for x in self.datafiles():
290 292 yield x
291 293 # yield manifest before changelog
292 294 for x in reversed(self._walk('', False)):
293 295 yield x
294 296
295 297 def copylist(self):
296 298 return ['requires'] + _data.split()
297 299
298 300 def write(self):
299 301 pass
300 302
301 303 class encodedstore(basicstore):
302 304 def __init__(self, path, openertype):
303 305 self.path = path + '/store'
304 306 self.createmode = _calcmode(self.path)
305 307 op = openertype(self.path)
306 308 op.createmode = self.createmode
307 309 self.opener = scmutil.filteropener(op, encodefilename)
308 310
309 311 def datafiles(self):
310 312 for a, b, size in self._walk('data', True):
311 313 try:
312 314 a = decodefilename(a)
313 315 except KeyError:
314 316 a = None
315 317 yield a, b, size
316 318
317 319 def join(self, f):
318 320 return self.path + '/' + encodefilename(f)
319 321
320 322 def copylist(self):
321 323 return (['requires', '00changelog.i'] +
322 324 ['store/' + f for f in _data.split()])
323 325
324 326 class fncache(object):
325 327 # the filename used to be partially encoded
326 328 # hence the encodedir/decodedir dance
327 329 def __init__(self, opener):
328 330 self.opener = opener
329 331 self.entries = None
330 332 self._dirty = False
331 333
332 334 def _load(self):
333 335 '''fill the entries from the fncache file'''
334 336 self._dirty = False
335 337 try:
336 338 fp = self.opener('fncache', mode='rb')
337 339 except IOError:
338 340 # skip nonexistent file
339 341 self.entries = set()
340 342 return
341 343 self.entries = set(decodedir(fp.read()).splitlines())
342 344 if '' in self.entries:
343 345 fp.seek(0)
344 346 for n, line in enumerate(fp):
345 347 if not line.rstrip('\n'):
346 348 t = _('invalid entry in fncache, line %s') % (n + 1)
347 349 raise util.Abort(t)
348 350 fp.close()
349 351
350 352 def _write(self, files, atomictemp):
351 353 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
352 354 if files:
353 355 fp.write(encodedir('\n'.join(files) + '\n'))
354 356 fp.close()
355 357 self._dirty = False
356 358
357 359 def rewrite(self, files):
358 360 self._write(files, False)
359 361 self.entries = set(files)
360 362
361 363 def write(self):
362 364 if self._dirty:
363 365 self._write(self.entries, True)
364 366
365 367 def add(self, fn):
366 368 if self.entries is None:
367 369 self._load()
368 370 if fn not in self.entries:
369 371 self._dirty = True
370 372 self.entries.add(fn)
371 373
372 374 def __contains__(self, fn):
373 375 if self.entries is None:
374 376 self._load()
375 377 return fn in self.entries
376 378
377 379 def __iter__(self):
378 380 if self.entries is None:
379 381 self._load()
380 382 return iter(self.entries)
381 383
382 384 class _fncacheopener(scmutil.abstractopener):
383 385 def __init__(self, op, fnc, encode):
384 386 self.opener = op
385 387 self.fncache = fnc
386 388 self.encode = encode
387 389
388 390 def _getmustaudit(self):
389 391 return self.opener.mustaudit
390 392
391 393 def _setmustaudit(self, onoff):
392 394 self.opener.mustaudit = onoff
393 395
394 396 mustaudit = property(_getmustaudit, _setmustaudit)
395 397
396 398 def __call__(self, path, mode='r', *args, **kw):
397 399 if mode not in ('r', 'rb') and path.startswith('data/'):
398 400 self.fncache.add(path)
399 401 return self.opener(self.encode(path), mode, *args, **kw)
400 402
401 403 def _plainhybridencode(f):
402 404 return _hybridencode(f, False)
403 405
404 406 def _dothybridencode(f):
405 407 return _hybridencode(f, True)
406 408
407 409 class fncachestore(basicstore):
408 410 def __init__(self, path, openertype, dotencode):
409 411 if dotencode:
410 412 encode = _dothybridencode
411 413 else:
412 414 encode = _plainhybridencode
413 415 self.encode = encode
414 416 self.path = path + '/store'
415 417 self.pathsep = self.path + '/'
416 418 self.createmode = _calcmode(self.path)
417 419 op = openertype(self.path)
418 420 op.createmode = self.createmode
419 421 fnc = fncache(op)
420 422 self.fncache = fnc
421 423 self.opener = _fncacheopener(op, fnc, encode)
422 424
423 425 def join(self, f):
424 426 return self.pathsep + self.encode(f)
425 427
426 428 def getsize(self, path):
427 429 return os.stat(self.pathsep + path).st_size
428 430
429 431 def datafiles(self):
430 432 rewrite = False
431 433 existing = []
432 434 for f in sorted(self.fncache):
433 435 ef = self.encode(f)
434 436 try:
435 437 yield f, ef, self.getsize(ef)
436 438 existing.append(f)
437 439 except OSError, err:
438 440 if err.errno != errno.ENOENT:
439 441 raise
440 442 # nonexistent entry
441 443 rewrite = True
442 444 if rewrite:
443 445 # rewrite fncache to remove nonexistent entries
444 446 # (may be caused by rollback / strip)
445 447 self.fncache.rewrite(existing)
446 448
447 449 def copylist(self):
448 450 d = ('data dh fncache phaseroots obsstore'
449 451 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
450 452 return (['requires', '00changelog.i'] +
451 453 ['store/' + f for f in d.split()])
452 454
453 455 def write(self):
454 456 self.fncache.write()
455 457
456 458 def store(requirements, path, openertype):
457 459 if 'store' in requirements:
458 460 if 'fncache' in requirements:
459 461 return fncachestore(path, openertype, 'dotencode' in requirements)
460 462 return encodedstore(path, openertype)
461 463 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now