##// END OF EJS Templates
store: add multiline doctest case for encodedir()...
Adrian Buehlmann -
r17605:e9cc29be default
parent child Browse files
Show More
@@ -1,459 +1,461 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, scmutil, util
10 10 import os, stat, errno
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 '''
18 18 >>> encodedir('data/foo.i')
19 19 'data/foo.i'
20 20 >>> encodedir('data/foo.i/bla.i')
21 21 'data/foo.i.hg/bla.i'
22 22 >>> encodedir('data/foo.i.hg/bla.i')
23 23 'data/foo.i.hg.hg/bla.i'
24 >>> encodedir('data/foo.i\\ndata/foo.i/bla.i\\ndata/foo.i.hg/bla.i\\n')
25 'data/foo.i\\ndata/foo.i.hg/bla.i\\ndata/foo.i.hg.hg/bla.i\\n'
24 26 '''
25 27 return (path
26 28 .replace(".hg/", ".hg.hg/")
27 29 .replace(".i/", ".i.hg/")
28 30 .replace(".d/", ".d.hg/"))
29 31
30 32 def decodedir(path):
31 33 '''
32 34 >>> decodedir('data/foo.i')
33 35 'data/foo.i'
34 36 >>> decodedir('data/foo.i.hg/bla.i')
35 37 'data/foo.i/bla.i'
36 38 >>> decodedir('data/foo.i.hg.hg/bla.i')
37 39 'data/foo.i.hg/bla.i'
38 40 '''
39 41 if ".hg/" not in path:
40 42 return path
41 43 return (path
42 44 .replace(".d.hg/", ".d/")
43 45 .replace(".i.hg/", ".i/")
44 46 .replace(".hg.hg/", ".hg/"))
45 47
46 48 def _buildencodefun():
47 49 '''
48 50 >>> enc, dec = _buildencodefun()
49 51
50 52 >>> enc('nothing/special.txt')
51 53 'nothing/special.txt'
52 54 >>> dec('nothing/special.txt')
53 55 'nothing/special.txt'
54 56
55 57 >>> enc('HELLO')
56 58 '_h_e_l_l_o'
57 59 >>> dec('_h_e_l_l_o')
58 60 'HELLO'
59 61
60 62 >>> enc('hello:world?')
61 63 'hello~3aworld~3f'
62 64 >>> dec('hello~3aworld~3f')
63 65 'hello:world?'
64 66
65 67 >>> enc('the\x07quick\xADshot')
66 68 'the~07quick~adshot'
67 69 >>> dec('the~07quick~adshot')
68 70 'the\\x07quick\\xadshot'
69 71 '''
70 72 e = '_'
71 73 winreserved = [ord(x) for x in '\\:*?"<>|']
72 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
73 75 for x in (range(32) + range(126, 256) + winreserved):
74 76 cmap[chr(x)] = "~%02x" % x
75 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
76 78 cmap[chr(x)] = e + chr(x).lower()
77 79 dmap = {}
78 80 for k, v in cmap.iteritems():
79 81 dmap[v] = k
80 82 def decode(s):
81 83 i = 0
82 84 while i < len(s):
83 85 for l in xrange(1, 4):
84 86 try:
85 87 yield dmap[s[i:i + l]]
86 88 i += l
87 89 break
88 90 except KeyError:
89 91 pass
90 92 else:
91 93 raise KeyError
92 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
93 95 lambda s: decodedir("".join(list(decode(s)))))
94 96
95 97 encodefilename, decodefilename = _buildencodefun()
96 98
97 99 def _buildlowerencodefun():
98 100 '''
99 101 >>> f = _buildlowerencodefun()
100 102 >>> f('nothing/special.txt')
101 103 'nothing/special.txt'
102 104 >>> f('HELLO')
103 105 'hello'
104 106 >>> f('hello:world?')
105 107 'hello~3aworld~3f'
106 108 >>> f('the\x07quick\xADshot')
107 109 'the~07quick~adshot'
108 110 '''
109 111 winreserved = [ord(x) for x in '\\:*?"<>|']
110 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
111 113 for x in (range(32) + range(126, 256) + winreserved):
112 114 cmap[chr(x)] = "~%02x" % x
113 115 for x in range(ord("A"), ord("Z")+1):
114 116 cmap[chr(x)] = chr(x).lower()
115 117 return lambda s: "".join([cmap[c] for c in s])
116 118
117 119 lowerencode = _buildlowerencodefun()
118 120
119 121 # Windows reserved names: con, prn, aux, nul, com1..com9, lpt1..lpt9
120 122 _winres3 = ('aux', 'con', 'prn', 'nul') # length 3
121 123 _winres4 = ('com', 'lpt') # length 4 (with trailing 1..9)
122 124 def _auxencode(path, dotencode):
123 125 '''
124 126 Encodes filenames containing names reserved by Windows or which end in
125 127 period or space. Does not touch other single reserved characters c.
126 128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
127 129 Additionally encodes space or period at the beginning, if dotencode is
128 130 True. Parameter path is assumed to be all lowercase.
129 131 A segment only needs encoding if a reserved name appears as a
130 132 basename (e.g. "aux", "aux.foo"). A directory or file named "foo.aux"
131 133 doesn't need encoding.
132 134
133 135 >>> s = '.foo/aux.txt/txt.aux/con/prn/nul/foo.'
134 136 >>> _auxencode(s.split('/'), True)
135 137 ['~2efoo', 'au~78.txt', 'txt.aux', 'co~6e', 'pr~6e', 'nu~6c', 'foo~2e']
136 138 >>> s = '.com1com2/lpt9.lpt4.lpt1/conprn/com0/lpt0/foo.'
137 139 >>> _auxencode(s.split('/'), False)
138 140 ['.com1com2', 'lp~749.lpt4.lpt1', 'conprn', 'com0', 'lpt0', 'foo~2e']
139 141 >>> _auxencode(['foo. '], True)
140 142 ['foo.~20']
141 143 >>> _auxencode([' .foo'], True)
142 144 ['~20.foo']
143 145 '''
144 146 for i, n in enumerate(path):
145 147 if not n:
146 148 continue
147 149 if dotencode and n[0] in '. ':
148 150 n = "~%02x" % ord(n[0]) + n[1:]
149 151 path[i] = n
150 152 else:
151 153 l = n.find('.')
152 154 if l == -1:
153 155 l = len(n)
154 156 if ((l == 3 and n[:3] in _winres3) or
155 157 (l == 4 and n[3] <= '9' and n[3] >= '1'
156 158 and n[:3] in _winres4)):
157 159 # encode third letter ('aux' -> 'au~78')
158 160 ec = "~%02x" % ord(n[2])
159 161 n = n[0:2] + ec + n[3:]
160 162 path[i] = n
161 163 if n[-1] in '. ':
162 164 # encode last period or space ('foo...' -> 'foo..~2e')
163 165 path[i] = n[:-1] + "~%02x" % ord(n[-1])
164 166 return path
165 167
166 168 _maxstorepathlen = 120
167 169 _dirprefixlen = 8
168 170 _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4
169 171 def _hybridencode(path, dotencode):
170 172 '''encodes path with a length limit
171 173
172 174 Encodes all paths that begin with 'data/', according to the following.
173 175
174 176 Default encoding (reversible):
175 177
176 178 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
177 179 characters are encoded as '~xx', where xx is the two digit hex code
178 180 of the character (see encodefilename).
179 181 Relevant path components consisting of Windows reserved filenames are
180 182 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
181 183
182 184 Hashed encoding (not reversible):
183 185
184 186 If the default-encoded path is longer than _maxstorepathlen, a
185 187 non-reversible hybrid hashing of the path is done instead.
186 188 This encoding uses up to _dirprefixlen characters of all directory
187 189 levels of the lowerencoded path, but not more levels than can fit into
188 190 _maxshortdirslen.
189 191 Then follows the filler followed by the sha digest of the full path.
190 192 The filler is the beginning of the basename of the lowerencoded path
191 193 (the basename is everything after the last path separator). The filler
192 194 is as long as possible, filling in characters from the basename until
193 195 the encoded path has _maxstorepathlen characters (or all chars of the
194 196 basename have been taken).
195 197 The extension (e.g. '.i' or '.d') is preserved.
196 198
197 199 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
198 200 encoding was used.
199 201 '''
200 202 ef = encodefilename(path).split('/')
201 203 res = '/'.join(_auxencode(ef, dotencode))
202 204 if len(res) > _maxstorepathlen:
203 205 path = encodedir(path)
204 206 digest = _sha(path).hexdigest()
205 207 le = lowerencode(path).split('/')[1:]
206 208 parts = _auxencode(le, dotencode)
207 209 basename = parts[-1]
208 210 _root, ext = os.path.splitext(basename)
209 211 sdirs = []
210 212 sdirslen = 0
211 213 for p in parts[:-1]:
212 214 d = p[:_dirprefixlen]
213 215 if d[-1] in '. ':
214 216 # Windows can't access dirs ending in period or space
215 217 d = d[:-1] + '_'
216 218 if sdirslen == 0:
217 219 t = len(d)
218 220 else:
219 221 t = sdirslen + 1 + len(d)
220 222 if t > _maxshortdirslen:
221 223 break
222 224 sdirs.append(d)
223 225 sdirslen = t
224 226 dirs = '/'.join(sdirs)
225 227 if len(dirs) > 0:
226 228 dirs += '/'
227 229 res = 'dh/' + dirs + digest + ext
228 230 spaceleft = _maxstorepathlen - len(res)
229 231 if spaceleft > 0:
230 232 filler = basename[:spaceleft]
231 233 res = 'dh/' + dirs + filler + digest + ext
232 234 return res
233 235
234 236 def _calcmode(path):
235 237 try:
236 238 # files in .hg/ will be created using this mode
237 239 mode = os.stat(path).st_mode
238 240 # avoid some useless chmods
239 241 if (0777 & ~util.umask) == (0777 & mode):
240 242 mode = None
241 243 except OSError:
242 244 mode = None
243 245 return mode
244 246
245 247 _data = ('data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
246 248 ' phaseroots obsstore')
247 249
248 250 class basicstore(object):
249 251 '''base class for local repository stores'''
250 252 def __init__(self, path, openertype):
251 253 self.path = path
252 254 self.createmode = _calcmode(path)
253 255 op = openertype(self.path)
254 256 op.createmode = self.createmode
255 257 self.opener = scmutil.filteropener(op, encodedir)
256 258
257 259 def join(self, f):
258 260 return self.path + '/' + encodedir(f)
259 261
260 262 def _walk(self, relpath, recurse):
261 263 '''yields (unencoded, encoded, size)'''
262 264 path = self.path
263 265 if relpath:
264 266 path += '/' + relpath
265 267 striplen = len(self.path) + 1
266 268 l = []
267 269 if os.path.isdir(path):
268 270 visit = [path]
269 271 while visit:
270 272 p = visit.pop()
271 273 for f, kind, st in osutil.listdir(p, stat=True):
272 274 fp = p + '/' + f
273 275 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
274 276 n = util.pconvert(fp[striplen:])
275 277 l.append((decodedir(n), n, st.st_size))
276 278 elif kind == stat.S_IFDIR and recurse:
277 279 visit.append(fp)
278 280 l.sort()
279 281 return l
280 282
281 283 def datafiles(self):
282 284 return self._walk('data', True)
283 285
284 286 def walk(self):
285 287 '''yields (unencoded, encoded, size)'''
286 288 # yield data files first
287 289 for x in self.datafiles():
288 290 yield x
289 291 # yield manifest before changelog
290 292 for x in reversed(self._walk('', False)):
291 293 yield x
292 294
293 295 def copylist(self):
294 296 return ['requires'] + _data.split()
295 297
296 298 def write(self):
297 299 pass
298 300
299 301 class encodedstore(basicstore):
300 302 def __init__(self, path, openertype):
301 303 self.path = path + '/store'
302 304 self.createmode = _calcmode(self.path)
303 305 op = openertype(self.path)
304 306 op.createmode = self.createmode
305 307 self.opener = scmutil.filteropener(op, encodefilename)
306 308
307 309 def datafiles(self):
308 310 for a, b, size in self._walk('data', True):
309 311 try:
310 312 a = decodefilename(a)
311 313 except KeyError:
312 314 a = None
313 315 yield a, b, size
314 316
315 317 def join(self, f):
316 318 return self.path + '/' + encodefilename(f)
317 319
318 320 def copylist(self):
319 321 return (['requires', '00changelog.i'] +
320 322 ['store/' + f for f in _data.split()])
321 323
322 324 class fncache(object):
323 325 # the filename used to be partially encoded
324 326 # hence the encodedir/decodedir dance
325 327 def __init__(self, opener):
326 328 self.opener = opener
327 329 self.entries = None
328 330 self._dirty = False
329 331
330 332 def _load(self):
331 333 '''fill the entries from the fncache file'''
332 334 self._dirty = False
333 335 try:
334 336 fp = self.opener('fncache', mode='rb')
335 337 except IOError:
336 338 # skip nonexistent file
337 339 self.entries = set()
338 340 return
339 341 self.entries = set(decodedir(fp.read()).splitlines())
340 342 if '' in self.entries:
341 343 fp.seek(0)
342 344 for n, line in enumerate(fp):
343 345 if not line.rstrip('\n'):
344 346 t = _('invalid entry in fncache, line %s') % (n + 1)
345 347 raise util.Abort(t)
346 348 fp.close()
347 349
348 350 def _write(self, files, atomictemp):
349 351 fp = self.opener('fncache', mode='wb', atomictemp=atomictemp)
350 352 if files:
351 353 fp.write(encodedir('\n'.join(files) + '\n'))
352 354 fp.close()
353 355 self._dirty = False
354 356
355 357 def rewrite(self, files):
356 358 self._write(files, False)
357 359 self.entries = set(files)
358 360
359 361 def write(self):
360 362 if self._dirty:
361 363 self._write(self.entries, True)
362 364
363 365 def add(self, fn):
364 366 if self.entries is None:
365 367 self._load()
366 368 if fn not in self.entries:
367 369 self._dirty = True
368 370 self.entries.add(fn)
369 371
370 372 def __contains__(self, fn):
371 373 if self.entries is None:
372 374 self._load()
373 375 return fn in self.entries
374 376
375 377 def __iter__(self):
376 378 if self.entries is None:
377 379 self._load()
378 380 return iter(self.entries)
379 381
380 382 class _fncacheopener(scmutil.abstractopener):
381 383 def __init__(self, op, fnc, encode):
382 384 self.opener = op
383 385 self.fncache = fnc
384 386 self.encode = encode
385 387
386 388 def _getmustaudit(self):
387 389 return self.opener.mustaudit
388 390
389 391 def _setmustaudit(self, onoff):
390 392 self.opener.mustaudit = onoff
391 393
392 394 mustaudit = property(_getmustaudit, _setmustaudit)
393 395
394 396 def __call__(self, path, mode='r', *args, **kw):
395 397 if mode not in ('r', 'rb') and path.startswith('data/'):
396 398 self.fncache.add(path)
397 399 return self.opener(self.encode(path), mode, *args, **kw)
398 400
399 401 def _plainhybridencode(f):
400 402 return _hybridencode(f, False)
401 403
402 404 def _dothybridencode(f):
403 405 return _hybridencode(f, True)
404 406
405 407 class fncachestore(basicstore):
406 408 def __init__(self, path, openertype, dotencode):
407 409 if dotencode:
408 410 encode = _dothybridencode
409 411 else:
410 412 encode = _plainhybridencode
411 413 self.encode = encode
412 414 self.path = path + '/store'
413 415 self.pathsep = self.path + '/'
414 416 self.createmode = _calcmode(self.path)
415 417 op = openertype(self.path)
416 418 op.createmode = self.createmode
417 419 fnc = fncache(op)
418 420 self.fncache = fnc
419 421 self.opener = _fncacheopener(op, fnc, encode)
420 422
421 423 def join(self, f):
422 424 return self.pathsep + self.encode(f)
423 425
424 426 def getsize(self, path):
425 427 return os.stat(self.pathsep + path).st_size
426 428
427 429 def datafiles(self):
428 430 rewrite = False
429 431 existing = []
430 432 for f in sorted(self.fncache):
431 433 ef = self.encode(f)
432 434 try:
433 435 yield f, ef, self.getsize(ef)
434 436 existing.append(f)
435 437 except OSError, err:
436 438 if err.errno != errno.ENOENT:
437 439 raise
438 440 # nonexistent entry
439 441 rewrite = True
440 442 if rewrite:
441 443 # rewrite fncache to remove nonexistent entries
442 444 # (may be caused by rollback / strip)
443 445 self.fncache.rewrite(existing)
444 446
445 447 def copylist(self):
446 448 d = ('data dh fncache phaseroots obsstore'
447 449 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
448 450 return (['requires', '00changelog.i'] +
449 451 ['store/' + f for f in d.split()])
450 452
451 453 def write(self):
452 454 self.fncache.write()
453 455
454 456 def store(requirements, path, openertype):
455 457 if 'store' in requirements:
456 458 if 'fncache' in requirements:
457 459 return fncachestore(path, openertype, 'dotencode' in requirements)
458 460 return encodedstore(path, openertype)
459 461 return basicstore(path, openertype)
General Comments 0
You need to be logged in to leave comments. Login now