##// END OF EJS Templates
store: sort filenames in place
Bryan O'Sullivan -
r17054:125ff565 default
parent child Browse files
Show More
@@ -1,427 +1,428 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 121 _winreservednames = '''con prn aux nul
122 122 com1 com2 com3 com4 com5 com6 com7 com8 com9
123 123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 130 True.
131 131 path is assumed to be all lowercase.
132 132
133 133 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
134 134 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
135 135 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/foo.', False)
136 136 '.com1com2/lp~749.lpt4.lpt1/conprn/foo~2e'
137 137 >>> _auxencode('foo. ', True)
138 138 'foo.~20'
139 139 >>> _auxencode(' .foo', True)
140 140 '~20.foo'
141 141 '''
142 142 res = []
143 143 for n in path.split('/'):
144 144 if n:
145 145 base = n.split('.')[0]
146 146 if base and (base in _winreservednames):
147 147 # encode third letter ('aux' -> 'au~78')
148 148 ec = "~%02x" % ord(n[2])
149 149 n = n[0:2] + ec + n[3:]
150 150 if n[-1] in '. ':
151 151 # encode last period or space ('foo...' -> 'foo..~2e')
152 152 n = n[:-1] + "~%02x" % ord(n[-1])
153 153 if dotencode and n[0] in '. ':
154 154 n = "~%02x" % ord(n[0]) + n[1:]
155 155 res.append(n)
156 156 return '/'.join(res)
157 157
158 158 _maxstorepathlen = 120
159 159 _dirprefixlen = 8
160 160 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
161 161 def _hybridencode(path, auxencode):
162 162 '''encodes path with a length limit
163 163
164 164 Encodes all paths that begin with 'data/', according to the following.
165 165
166 166 Default encoding (reversible):
167 167
168 168 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
169 169 characters are encoded as '~xx', where xx is the two digit hex code
170 170 of the character (see encodefilename).
171 171 Relevant path components consisting of Windows reserved filenames are
172 172 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
173 173
174 174 Hashed encoding (not reversible):
175 175
176 176 If the default-encoded path is longer than _maxstorepathlen, a
177 177 non-reversible hybrid hashing of the path is done instead.
178 178 This encoding uses up to _dirprefixlen characters of all directory
179 179 levels of the lowerencoded path, but not more levels than can fit into
180 180 _maxshortdirslen.
181 181 Then follows the filler followed by the sha digest of the full path.
182 182 The filler is the beginning of the basename of the lowerencoded path
183 183 (the basename is everything after the last path separator). The filler
184 184 is as long as possible, filling in characters from the basename until
185 185 the encoded path has _maxstorepathlen characters (or all chars of the
186 186 basename have been taken).
187 187 The extension (e.g. '.i' or '.d') is preserved.
188 188
189 189 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
190 190 encoding was used.
191 191 '''
192 192 if not path.startswith('data/'):
193 193 return path
194 194 # escape directories ending with .i and .d
195 195 path = encodedir(path)
196 196 ndpath = path[len('data/'):]
197 197 res = 'data/' + auxencode(encodefilename(ndpath))
198 198 if len(res) > _maxstorepathlen:
199 199 digest = _sha(path).hexdigest()
200 200 aep = auxencode(lowerencode(ndpath))
201 201 _root, ext = os.path.splitext(aep)
202 202 parts = aep.split('/')
203 203 basename = parts[-1]
204 204 sdirs = []
205 205 for p in parts[:-1]:
206 206 d = p[:_dirprefixlen]
207 207 if d[-1] in '. ':
208 208 # Windows can't access dirs ending in period or space
209 209 d = d[:-1] + '_'
210 210 t = '/'.join(sdirs) + '/' + d
211 211 if len(t) > _maxshortdirslen:
212 212 break
213 213 sdirs.append(d)
214 214 dirs = '/'.join(sdirs)
215 215 if len(dirs) > 0:
216 216 dirs += '/'
217 217 res = 'dh/' + dirs + digest + ext
218 218 spaceleft = _maxstorepathlen - len(res)
219 219 if spaceleft > 0:
220 220 filler = basename[:spaceleft]
221 221 res = 'dh/' + dirs + filler + digest + ext
222 222 return res
223 223
224 224 def _calcmode(path):
225 225 try:
226 226 # files in .hg/ will be created using this mode
227 227 mode = os.stat(path).st_mode
228 228 # avoid some useless chmods
229 229 if (0777 & ~util.umask) == (0777 & mode):
230 230 mode = None
231 231 except OSError:
232 232 mode = None
233 233 return mode
234 234
235 235 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i phaseroots'
236 236
237 237 class basicstore(object):
238 238 '''base class for local repository stores'''
239 239 def __init__(self, path, openertype):
240 240 self.path = path
241 241 self.createmode = _calcmode(path)
242 242 op = openertype(self.path)
243 243 op.createmode = self.createmode
244 244 self.opener = scmutil.filteropener(op, encodedir)
245 245
246 246 def join(self, f):
247 247 return self.path + '/' + encodedir(f)
248 248
249 249 def _walk(self, relpath, recurse):
250 250 '''yields (unencoded, encoded, size)'''
251 251 path = self.path
252 252 if relpath:
253 253 path += '/' + relpath
254 254 striplen = len(self.path) + 1
255 255 l = []
256 256 if os.path.isdir(path):
257 257 visit = [path]
258 258 while visit:
259 259 p = visit.pop()
260 260 for f, kind, st in osutil.listdir(p, stat=True):
261 261 fp = p + '/' + f
262 262 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
263 263 n = util.pconvert(fp[striplen:])
264 264 l.append((decodedir(n), n, st.st_size))
265 265 elif kind == stat.S_IFDIR and recurse:
266 266 visit.append(fp)
267 return sorted(l)
267 l.sort()
268 return l
268 269
269 270 def datafiles(self):
270 271 return self._walk('data', True)
271 272
272 273 def walk(self):
273 274 '''yields (unencoded, encoded, size)'''
274 275 # yield data files first
275 276 for x in self.datafiles():
276 277 yield x
277 278 # yield manifest before changelog
278 279 for x in reversed(self._walk('', False)):
279 280 yield x
280 281
281 282 def copylist(self):
282 283 return ['requires'] + _data.split()
283 284
284 285 def write(self):
285 286 pass
286 287
287 288 class encodedstore(basicstore):
288 289 def __init__(self, path, openertype):
289 290 self.path = path + '/store'
290 291 self.createmode = _calcmode(self.path)
291 292 op = openertype(self.path)
292 293 op.createmode = self.createmode
293 294 self.opener = scmutil.filteropener(op, encodefilename)
294 295
295 296 def datafiles(self):
296 297 for a, b, size in self._walk('data', True):
297 298 try:
298 299 a = decodefilename(a)
299 300 except KeyError:
300 301 a = None
301 302 yield a, b, size
302 303
303 304 def join(self, f):
304 305 return self.path + '/' + encodefilename(f)
305 306
306 307 def copylist(self):
307 308 return (['requires', '00changelog.i'] +
308 309 ['store/' + f for f in _data.split()])
309 310
310 311 class fncache(object):
311 312 # the filename used to be partially encoded
312 313 # hence the encodedir/decodedir dance
313 314 def __init__(self, opener):
314 315 self.opener = opener
315 316 self.entries = None
316 317 self._dirty = False
317 318
318 319 def _load(self):
319 320 '''fill the entries from the fncache file'''
320 321 self._dirty = False
321 322 try:
322 323 fp = self.opener('fncache', mode='rb')
323 324 except IOError:
324 325 # skip nonexistent file
325 326 self.entries = set()
326 327 return
327 328 self.entries = set(map(decodedir, fp.read().splitlines()))
328 329 if '' in self.entries:
329 330 fp.seek(0)
330 331 for n, line in enumerate(fp):
331 332 if not line.rstrip('\n'):
332 333 t = _('invalid entry in fncache, line %s') % (n + 1)
333 334 raise util.Abort(t)
334 335 fp.close()
335 336
336 337 def _write(self, files, atomictemp):
337 338 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
338 339 if files:
339 340 fp.write('\n'.join(map(encodedir, files)) + '\n')
340 341 fp.close()
341 342 self._dirty = False
342 343
343 344 def rewrite(self, files):
344 345 self._write(files, False)
345 346 self.entries = set(files)
346 347
347 348 def write(self):
348 349 if self._dirty:
349 350 self._write(self.entries, True)
350 351
351 352 def add(self, fn):
352 353 if self.entries is None:
353 354 self._load()
354 355 if fn not in self.entries:
355 356 self._dirty = True
356 357 self.entries.add(fn)
357 358
358 359 def __contains__(self, fn):
359 360 if self.entries is None:
360 361 self._load()
361 362 return fn in self.entries
362 363
363 364 def __iter__(self):
364 365 if self.entries is None:
365 366 self._load()
366 367 return iter(self.entries)
367 368
368 369 class _fncacheopener(scmutil.abstractopener):
369 370 def __init__(self, op, fnc, encode):
370 371 self.opener = op
371 372 self.fncache = fnc
372 373 self.encode = encode
373 374
374 375 def __call__(self, path, mode='r', *args, **kw):
375 376 if mode not in ('r', 'rb') and path.startswith('data/'):
376 377 self.fncache.add(path)
377 378 return self.opener(self.encode(path), mode, *args, **kw)
378 379
379 380 class fncachestore(basicstore):
380 381 def __init__(self, path, openertype, encode):
381 382 self.encode = encode
382 383 self.path = path + '/store'
383 384 self.createmode = _calcmode(self.path)
384 385 op = openertype(self.path)
385 386 op.createmode = self.createmode
386 387 fnc = fncache(op)
387 388 self.fncache = fnc
388 389 self.opener = _fncacheopener(op, fnc, encode)
389 390
390 391 def join(self, f):
391 392 return self.path + '/' + self.encode(f)
392 393
393 394 def datafiles(self):
394 395 rewrite = False
395 396 existing = []
396 397 spath = self.path
397 398 for f in self.fncache:
398 399 ef = self.encode(f)
399 400 try:
400 401 st = os.stat(spath + '/' + ef)
401 402 yield f, ef, st.st_size
402 403 existing.append(f)
403 404 except OSError:
404 405 # nonexistent entry
405 406 rewrite = True
406 407 if rewrite:
407 408 # rewrite fncache to remove nonexistent entries
408 409 # (may be caused by rollback / strip)
409 410 self.fncache.rewrite(existing)
410 411
411 412 def copylist(self):
412 413 d = ('data dh fncache phaseroots'
413 414 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
414 415 return (['requires', '00changelog.i'] +
415 416 ['store/' + f for f in d.split()])
416 417
417 418 def write(self):
418 419 self.fncache.write()
419 420
420 421 def store(requirements, path, openertype):
421 422 if 'store' in requirements:
422 423 if 'fncache' in requirements:
423 424 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
424 425 encode = lambda f: _hybridencode(f, auxencode)
425 426 return fncachestore(path, openertype, encode)
426 427 return encodedstore(path, openertype)
427 428 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now