##// END OF EJS Templates
store: sort the results of fncachestore.datafiles()
Bryan O'Sullivan -
r17373:4cbb1137 stable
parent child Browse files
Show More
@@ -1,430 +1,430 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 24 '''
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".hg/", ".hg.hg/")
29 29 .replace(".i/", ".i.hg/")
30 30 .replace(".d/", ".d.hg/"))
31 31
32 32 def decodedir(path):
33 33 '''
34 34 >>> decodedir('data/foo.i')
35 35 'data/foo.i'
36 36 >>> decodedir('data/foo.i.hg/bla.i')
37 37 'data/foo.i/bla.i'
38 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 39 'data/foo.i.hg/bla.i'
40 40 '''
41 41 if not path.startswith('data/') or ".hg/" not in path:
42 42 return path
43 43 return (path
44 44 .replace(".d.hg/", ".d/")
45 45 .replace(".i.hg/", ".i/")
46 46 .replace(".hg.hg/", ".hg/"))
47 47
48 48 def _buildencodefun():
49 49 '''
50 50 >>> enc, dec = _buildencodefun()
51 51
52 52 >>> enc('nothing/special.txt')
53 53 'nothing/special.txt'
54 54 >>> dec('nothing/special.txt')
55 55 'nothing/special.txt'
56 56
57 57 >>> enc('HELLO')
58 58 '_h_e_l_l_o'
59 59 >>> dec('_h_e_l_l_o')
60 60 'HELLO'
61 61
62 62 >>> enc('hello:world?')
63 63 'hello~3aworld~3f'
64 64 >>> dec('hello~3aworld~3f')
65 65 'hello:world?'
66 66
67 67 >>> enc('the\x07quick\xADshot')
68 68 'the~07quick~adshot'
69 69 >>> dec('the~07quick~adshot')
70 70 'the\\x07quick\\xadshot'
71 71 '''
72 72 e = '_'
73 73 winreserved = [ord(x) for x in '\\:*?"<>|']
74 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
75 75 for x in (range(32) + range(126, 256) + winreserved):
76 76 cmap[chr(x)] = "~%02x" % x
77 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
78 78 cmap[chr(x)] = e + chr(x).lower()
79 79 dmap = {}
80 80 for k, v in cmap.iteritems():
81 81 dmap[v] = k
82 82 def decode(s):
83 83 i = 0
84 84 while i < len(s):
85 85 for l in xrange(1, 4):
86 86 try:
87 87 yield dmap[s[i:i + l]]
88 88 i += l
89 89 break
90 90 except KeyError:
91 91 pass
92 92 else:
93 93 raise KeyError
94 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
95 95 lambda s: decodedir("".join(list(decode(s)))))
96 96
97 97 encodefilename, decodefilename = _buildencodefun()
98 98
99 99 def _buildlowerencodefun():
100 100 '''
101 101 >>> f = _buildlowerencodefun()
102 102 >>> f('nothing/special.txt')
103 103 'nothing/special.txt'
104 104 >>> f('HELLO')
105 105 'hello'
106 106 >>> f('hello:world?')
107 107 'hello~3aworld~3f'
108 108 >>> f('the\x07quick\xADshot')
109 109 'the~07quick~adshot'
110 110 '''
111 111 winreserved = [ord(x) for x in '\\:*?"<>|']
112 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
113 113 for x in (range(32) + range(126, 256) + winreserved):
114 114 cmap[chr(x)] = "~%02x" % x
115 115 for x in range(ord("A"), ord("Z")+1):
116 116 cmap[chr(x)] = chr(x).lower()
117 117 return lambda s: "".join([cmap[c] for c in s])
118 118
119 119 lowerencode = _buildlowerencodefun()
120 120
121 121 _winreservednames = '''con prn aux nul
122 122 com1 com2 com3 com4 com5 com6 com7 com8 com9
123 123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
124 124 def _auxencode(path, dotencode):
125 125 '''
126 126 Encodes filenames containing names reserved by Windows or which end in
127 127 period or space. Does not touch other single reserved characters c.
128 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 129 Additionally encodes space or period at the beginning, if dotencode is
130 130 True.
131 131 path is assumed to be all lowercase.
132 132
133 133 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
134 134 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
135 135 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/foo.', False)
136 136 '.com1com2/lp~749.lpt4.lpt1/conprn/foo~2e'
137 137 >>> _auxencode('foo. ', True)
138 138 'foo.~20'
139 139 >>> _auxencode(' .foo', True)
140 140 '~20.foo'
141 141 '''
142 142 res = []
143 143 for n in path.split('/'):
144 144 if n:
145 145 base = n.split('.')[0]
146 146 if base and (base in _winreservednames):
147 147 # encode third letter ('aux' -> 'au~78')
148 148 ec = "~%02x" % ord(n[2])
149 149 n = n[0:2] + ec + n[3:]
150 150 if n[-1] in '. ':
151 151 # encode last period or space ('foo...' -> 'foo..~2e')
152 152 n = n[:-1] + "~%02x" % ord(n[-1])
153 153 if dotencode and n[0] in '. ':
154 154 n = "~%02x" % ord(n[0]) + n[1:]
155 155 res.append(n)
156 156 return '/'.join(res)
157 157
158 158 _maxstorepathlen = 120
159 159 _dirprefixlen = 8
160 160 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
161 161 def _hybridencode(path, auxencode):
162 162 '''encodes path with a length limit
163 163
164 164 Encodes all paths that begin with 'data/', according to the following.
165 165
166 166 Default encoding (reversible):
167 167
168 168 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
169 169 characters are encoded as '~xx', where xx is the two digit hex code
170 170 of the character (see encodefilename).
171 171 Relevant path components consisting of Windows reserved filenames are
172 172 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
173 173
174 174 Hashed encoding (not reversible):
175 175
176 176 If the default-encoded path is longer than _maxstorepathlen, a
177 177 non-reversible hybrid hashing of the path is done instead.
178 178 This encoding uses up to _dirprefixlen characters of all directory
179 179 levels of the lowerencoded path, but not more levels than can fit into
180 180 _maxshortdirslen.
181 181 Then follows the filler followed by the sha digest of the full path.
182 182 The filler is the beginning of the basename of the lowerencoded path
183 183 (the basename is everything after the last path separator). The filler
184 184 is as long as possible, filling in characters from the basename until
185 185 the encoded path has _maxstorepathlen characters (or all chars of the
186 186 basename have been taken).
187 187 The extension (e.g. '.i' or '.d') is preserved.
188 188
189 189 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
190 190 encoding was used.
191 191 '''
192 192 if not path.startswith('data/'):
193 193 return path
194 194 # escape directories ending with .i and .d
195 195 path = encodedir(path)
196 196 ndpath = path[len('data/'):]
197 197 res = 'data/' + auxencode(encodefilename(ndpath))
198 198 if len(res) > _maxstorepathlen:
199 199 digest = _sha(path).hexdigest()
200 200 aep = auxencode(lowerencode(ndpath))
201 201 _root, ext = os.path.splitext(aep)
202 202 parts = aep.split('/')
203 203 basename = parts[-1]
204 204 sdirs = []
205 205 for p in parts[:-1]:
206 206 d = p[:_dirprefixlen]
207 207 if d[-1] in '. ':
208 208 # Windows can't access dirs ending in period or space
209 209 d = d[:-1] + '_'
210 210 t = '/'.join(sdirs) + '/' + d
211 211 if len(t) > _maxshortdirslen:
212 212 break
213 213 sdirs.append(d)
214 214 dirs = '/'.join(sdirs)
215 215 if len(dirs) > 0:
216 216 dirs += '/'
217 217 res = 'dh/' + dirs + digest + ext
218 218 spaceleft = _maxstorepathlen - len(res)
219 219 if spaceleft > 0:
220 220 filler = basename[:spaceleft]
221 221 res = 'dh/' + dirs + filler + digest + ext
222 222 return res
223 223
224 224 def _calcmode(path):
225 225 try:
226 226 # files in .hg/ will be created using this mode
227 227 mode = os.stat(path).st_mode
228 228 # avoid some useless chmods
229 229 if (0777 & ~util.umask) == (0777 & mode):
230 230 mode = None
231 231 except OSError:
232 232 mode = None
233 233 return mode
234 234
235 235 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
236 236 ' phaseroots obsstore')
237 237
238 238 class basicstore(object):
239 239 '''base class for local repository stores'''
240 240 def __init__(self, path, openertype):
241 241 self.path = path
242 242 self.createmode = _calcmode(path)
243 243 op = openertype(self.path)
244 244 op.createmode = self.createmode
245 245 self.opener = scmutil.filteropener(op, encodedir)
246 246
247 247 def join(self, f):
248 248 return self.path + '/' + encodedir(f)
249 249
250 250 def _walk(self, relpath, recurse):
251 251 '''yields (unencoded, encoded, size)'''
252 252 path = self.path
253 253 if relpath:
254 254 path += '/' + relpath
255 255 striplen = len(self.path) + 1
256 256 l = []
257 257 if os.path.isdir(path):
258 258 visit = [path]
259 259 while visit:
260 260 p = visit.pop()
261 261 for f, kind, st in osutil.listdir(p, stat=True):
262 262 fp = p + '/' + f
263 263 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
264 264 n = util.pconvert(fp[striplen:])
265 265 l.append((decodedir(n), n, st.st_size))
266 266 elif kind == stat.S_IFDIR and recurse:
267 267 visit.append(fp)
268 268 l.sort()
269 269 return l
270 270
271 271 def datafiles(self):
272 272 return self._walk('data', True)
273 273
274 274 def walk(self):
275 275 '''yields (unencoded, encoded, size)'''
276 276 # yield data files first
277 277 for x in self.datafiles():
278 278 yield x
279 279 # yield manifest before changelog
280 280 for x in reversed(self._walk('', False)):
281 281 yield x
282 282
283 283 def copylist(self):
284 284 return ['requires'] + _data.split()
285 285
286 286 def write(self):
287 287 pass
288 288
289 289 class encodedstore(basicstore):
290 290 def __init__(self, path, openertype):
291 291 self.path = path + '/store'
292 292 self.createmode = _calcmode(self.path)
293 293 op = openertype(self.path)
294 294 op.createmode = self.createmode
295 295 self.opener = scmutil.filteropener(op, encodefilename)
296 296
297 297 def datafiles(self):
298 298 for a, b, size in self._walk('data', True):
299 299 try:
300 300 a = decodefilename(a)
301 301 except KeyError:
302 302 a = None
303 303 yield a, b, size
304 304
305 305 def join(self, f):
306 306 return self.path + '/' + encodefilename(f)
307 307
308 308 def copylist(self):
309 309 return (['requires', '00changelog.i'] +
310 310 ['store/' + f for f in _data.split()])
311 311
312 312 class fncache(object):
313 313 # the filename used to be partially encoded
314 314 # hence the encodedir/decodedir dance
315 315 def __init__(self, opener):
316 316 self.opener = opener
317 317 self.entries = None
318 318 self._dirty = False
319 319
320 320 def _load(self):
321 321 '''fill the entries from the fncache file'''
322 322 self._dirty = False
323 323 try:
324 324 fp = self.opener('fncache', mode='rb')
325 325 except IOError:
326 326 # skip nonexistent file
327 327 self.entries = set()
328 328 return
329 329 self.entries = set(map(decodedir, fp.read().splitlines()))
330 330 if '' in self.entries:
331 331 fp.seek(0)
332 332 for n, line in enumerate(fp):
333 333 if not line.rstrip('\n'):
334 334 t = _('invalid entry in fncache, line %s') % (n + 1)
335 335 raise util.Abort(t)
336 336 fp.close()
337 337
338 338 def _write(self, files, atomictemp):
339 339 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
340 340 if files:
341 341 fp.write('\n'.join(map(encodedir, files)) + '\n')
342 342 fp.close()
343 343 self._dirty = False
344 344
345 345 def rewrite(self, files):
346 346 self._write(files, False)
347 347 self.entries = set(files)
348 348
349 349 def write(self):
350 350 if self._dirty:
351 351 self._write(self.entries, True)
352 352
353 353 def add(self, fn):
354 354 if self.entries is None:
355 355 self._load()
356 356 if fn not in self.entries:
357 357 self._dirty = True
358 358 self.entries.add(fn)
359 359
360 360 def __contains__(self, fn):
361 361 if self.entries is None:
362 362 self._load()
363 363 return fn in self.entries
364 364
365 365 def __iter__(self):
366 366 if self.entries is None:
367 367 self._load()
368 368 return iter(self.entries)
369 369
370 370 class _fncacheopener(scmutil.abstractopener):
371 371 def __init__(self, op, fnc, encode):
372 372 self.opener = op
373 373 self.fncache = fnc
374 374 self.encode = encode
375 375
376 376 def __call__(self, path, mode='r', *args, **kw):
377 377 if mode not in ('r', 'rb') and path.startswith('data/'):
378 378 self.fncache.add(path)
379 379 return self.opener(self.encode(path), mode, *args, **kw)
380 380
381 381 class fncachestore(basicstore):
382 382 def __init__(self, path, openertype, encode):
383 383 self.encode = encode
384 384 self.path = path + '/store'
385 385 self.createmode = _calcmode(self.path)
386 386 op = openertype(self.path)
387 387 op.createmode = self.createmode
388 388 fnc = fncache(op)
389 389 self.fncache = fnc
390 390 self.opener = _fncacheopener(op, fnc, encode)
391 391
392 392 def join(self, f):
393 393 return self.path + '/' + self.encode(f)
394 394
395 395 def getsize(self, path):
396 396 return os.stat(self.path + '/' + path).st_size
397 397
398 398 def datafiles(self):
399 399 rewrite = False
400 400 existing = []
401 for f in self.fncache:
401 for f in sorted(self.fncache):
402 402 ef = self.encode(f)
403 403 try:
404 404 yield f, ef, self.getsize(ef)
405 405 existing.append(f)
406 406 except OSError:
407 407 # nonexistent entry
408 408 rewrite = True
409 409 if rewrite:
410 410 # rewrite fncache to remove nonexistent entries
411 411 # (may be caused by rollback / strip)
412 412 self.fncache.rewrite(existing)
413 413
414 414 def copylist(self):
415 415 d = ('data dh fncache phaseroots obsstore'
416 416 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
417 417 return (['requires', '00changelog.i'] +
418 418 ['store/' + f for f in d.split()])
419 419
420 420 def write(self):
421 421 self.fncache.write()
422 422
423 423 def store(requirements, path, openertype):
424 424 if 'store' in requirements:
425 425 if 'fncache' in requirements:
426 426 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
427 427 encode = lambda f: _hybridencode(f, auxencode)
428 428 return fncachestore(path, openertype, encode)
429 429 return encodedstore(path, openertype)
430 430 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now