##// END OF EJS Templates
store: add some doctests
Adrian Buehlmann -
r13949:ba43aa1e default
parent child Browse files
Show More
@@ -1,354 +1,421 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 '''
18 >>> encodedir('data/foo.i')
19 'data/foo.i'
20 >>> encodedir('data/foo.i/bla.i')
21 'data/foo.i.hg/bla.i'
22 >>> encodedir('data/foo.i.hg/bla.i')
23 'data/foo.i.hg.hg/bla.i'
24 '''
17 25 if not path.startswith('data/'):
18 26 return path
19 27 return (path
20 28 .replace(".hg/", ".hg.hg/")
21 29 .replace(".i/", ".i.hg/")
22 30 .replace(".d/", ".d.hg/"))
23 31
24 32 def decodedir(path):
33 '''
34 >>> decodedir('data/foo.i')
35 'data/foo.i'
36 >>> decodedir('data/foo.i.hg/bla.i')
37 'data/foo.i/bla.i'
38 >>> decodedir('data/foo.i.hg.hg/bla.i')
39 'data/foo.i.hg/bla.i'
40 '''
25 41 if not path.startswith('data/') or ".hg/" not in path:
26 42 return path
27 43 return (path
28 44 .replace(".d.hg/", ".d/")
29 45 .replace(".i.hg/", ".i/")
30 46 .replace(".hg.hg/", ".hg/"))
31 47
32 48 def _buildencodefun():
49 '''
50 >>> enc, dec = _buildencodefun()
51
52 >>> enc('nothing/special.txt')
53 'nothing/special.txt'
54 >>> dec('nothing/special.txt')
55 'nothing/special.txt'
56
57 >>> enc('HELLO')
58 '_h_e_l_l_o'
59 >>> dec('_h_e_l_l_o')
60 'HELLO'
61
62 >>> enc('hello:world?')
63 'hello~3aworld~3f'
64 >>> dec('hello~3aworld~3f')
65 'hello:world?'
66
67 >>> enc('the\x07quick\xADshot')
68 'the~07quick~adshot'
69 >>> dec('the~07quick~adshot')
70 'the\\x07quick\\xadshot'
71 '''
33 72 e = '_'
34 73 win_reserved = [ord(x) for x in '\\:*?"<>|']
35 74 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
36 75 for x in (range(32) + range(126, 256) + win_reserved):
37 76 cmap[chr(x)] = "~%02x" % x
38 77 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
39 78 cmap[chr(x)] = e + chr(x).lower()
40 79 dmap = {}
41 80 for k, v in cmap.iteritems():
42 81 dmap[v] = k
43 82 def decode(s):
44 83 i = 0
45 84 while i < len(s):
46 85 for l in xrange(1, 4):
47 86 try:
48 87 yield dmap[s[i:i + l]]
49 88 i += l
50 89 break
51 90 except KeyError:
52 91 pass
53 92 else:
54 93 raise KeyError
55 94 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
56 95 lambda s: decodedir("".join(list(decode(s)))))
57 96
58 97 encodefilename, decodefilename = _buildencodefun()
59 98
60 99 def _build_lower_encodefun():
100 '''
101 >>> f = _build_lower_encodefun()
102 >>> f('nothing/special.txt')
103 'nothing/special.txt'
104 >>> f('HELLO')
105 'hello'
106 >>> f('hello:world?')
107 'hello~3aworld~3f'
108 >>> f('the\x07quick\xADshot')
109 'the~07quick~adshot'
110 '''
61 111 win_reserved = [ord(x) for x in '\\:*?"<>|']
62 112 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
63 113 for x in (range(32) + range(126, 256) + win_reserved):
64 114 cmap[chr(x)] = "~%02x" % x
65 115 for x in range(ord("A"), ord("Z")+1):
66 116 cmap[chr(x)] = chr(x).lower()
67 117 return lambda s: "".join([cmap[c] for c in s])
68 118
69 119 lowerencode = _build_lower_encodefun()
70 120
71 121 _windows_reserved_filenames = '''con prn aux nul
72 122 com1 com2 com3 com4 com5 com6 com7 com8 com9
73 123 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
74 124 def _auxencode(path, dotencode):
125 '''
126 Encodes filenames containing names reserved by Windows or which end in
127 period or space. Does not touch other single reserved characters c.
128 Specifically, c in '\\:*?"<>|' or ord(c) <= 31 are *not* encoded here.
129 Additionally encodes space or period at the beginning, if dotencode is
130 True.
131 path is assumed to be all lowercase.
132
133 >>> _auxencode('.foo/aux.txt/txt.aux/con/prn/nul/foo.', True)
134 '~2efoo/au~78.txt/txt.aux/co~6e/pr~6e/nu~6c/foo~2e'
135 >>> _auxencode('.com1com2/lpt9.lpt4.lpt1/conprn/foo.', False)
136 '.com1com2/lp~749.lpt4.lpt1/conprn/foo~2e'
137 >>> _auxencode('foo. ', True)
138 'foo.~20'
139 >>> _auxencode(' .foo', True)
140 '~20.foo'
141 '''
75 142 res = []
76 143 for n in path.split('/'):
77 144 if n:
78 145 base = n.split('.')[0]
79 146 if base and (base in _windows_reserved_filenames):
80 147 # encode third letter ('aux' -> 'au~78')
81 148 ec = "~%02x" % ord(n[2])
82 149 n = n[0:2] + ec + n[3:]
83 150 if n[-1] in '. ':
84 151 # encode last period or space ('foo...' -> 'foo..~2e')
85 152 n = n[:-1] + "~%02x" % ord(n[-1])
86 153 if dotencode and n[0] in '. ':
87 154 n = "~%02x" % ord(n[0]) + n[1:]
88 155 res.append(n)
89 156 return '/'.join(res)
90 157
91 158 MAX_PATH_LEN_IN_HGSTORE = 120
92 159 DIR_PREFIX_LEN = 8
93 160 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
94 161 def _hybridencode(path, auxencode):
95 162 '''encodes path with a length limit
96 163
97 164 Encodes all paths that begin with 'data/', according to the following.
98 165
99 166 Default encoding (reversible):
100 167
101 168 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
102 169 characters are encoded as '~xx', where xx is the two digit hex code
103 170 of the character (see encodefilename).
104 171 Relevant path components consisting of Windows reserved filenames are
105 172 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
106 173
107 174 Hashed encoding (not reversible):
108 175
109 176 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
110 177 non-reversible hybrid hashing of the path is done instead.
111 178 This encoding uses up to DIR_PREFIX_LEN characters of all directory
112 179 levels of the lowerencoded path, but not more levels than can fit into
113 180 _MAX_SHORTENED_DIRS_LEN.
114 181 Then follows the filler followed by the sha digest of the full path.
115 182 The filler is the beginning of the basename of the lowerencoded path
116 183 (the basename is everything after the last path separator). The filler
117 184 is as long as possible, filling in characters from the basename until
118 185 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
119 186 of the basename have been taken).
120 187 The extension (e.g. '.i' or '.d') is preserved.
121 188
122 189 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
123 190 encoding was used.
124 191 '''
125 192 if not path.startswith('data/'):
126 193 return path
127 194 # escape directories ending with .i and .d
128 195 path = encodedir(path)
129 196 ndpath = path[len('data/'):]
130 197 res = 'data/' + auxencode(encodefilename(ndpath))
131 198 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
132 199 digest = _sha(path).hexdigest()
133 200 aep = auxencode(lowerencode(ndpath))
134 201 _root, ext = os.path.splitext(aep)
135 202 parts = aep.split('/')
136 203 basename = parts[-1]
137 204 sdirs = []
138 205 for p in parts[:-1]:
139 206 d = p[:DIR_PREFIX_LEN]
140 207 if d[-1] in '. ':
141 208 # Windows can't access dirs ending in period or space
142 209 d = d[:-1] + '_'
143 210 t = '/'.join(sdirs) + '/' + d
144 211 if len(t) > _MAX_SHORTENED_DIRS_LEN:
145 212 break
146 213 sdirs.append(d)
147 214 dirs = '/'.join(sdirs)
148 215 if len(dirs) > 0:
149 216 dirs += '/'
150 217 res = 'dh/' + dirs + digest + ext
151 218 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
152 219 if space_left > 0:
153 220 filler = basename[:space_left]
154 221 res = 'dh/' + dirs + filler + digest + ext
155 222 return res
156 223
157 224 def _calcmode(path):
158 225 try:
159 226 # files in .hg/ will be created using this mode
160 227 mode = os.stat(path).st_mode
161 228 # avoid some useless chmods
162 229 if (0777 & ~util.umask) == (0777 & mode):
163 230 mode = None
164 231 except OSError:
165 232 mode = None
166 233 return mode
167 234
168 235 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
169 236
170 237 class basicstore(object):
171 238 '''base class for local repository stores'''
172 239 def __init__(self, path, opener):
173 240 self.path = path
174 241 self.createmode = _calcmode(path)
175 242 op = opener(self.path)
176 243 op.createmode = self.createmode
177 244 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
178 245
179 246 def join(self, f):
180 247 return self.path + '/' + encodedir(f)
181 248
182 249 def _walk(self, relpath, recurse):
183 250 '''yields (unencoded, encoded, size)'''
184 251 path = self.path
185 252 if relpath:
186 253 path += '/' + relpath
187 254 striplen = len(self.path) + 1
188 255 l = []
189 256 if os.path.isdir(path):
190 257 visit = [path]
191 258 while visit:
192 259 p = visit.pop()
193 260 for f, kind, st in osutil.listdir(p, stat=True):
194 261 fp = p + '/' + f
195 262 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
196 263 n = util.pconvert(fp[striplen:])
197 264 l.append((decodedir(n), n, st.st_size))
198 265 elif kind == stat.S_IFDIR and recurse:
199 266 visit.append(fp)
200 267 return sorted(l)
201 268
202 269 def datafiles(self):
203 270 return self._walk('data', True)
204 271
205 272 def walk(self):
206 273 '''yields (unencoded, encoded, size)'''
207 274 # yield data files first
208 275 for x in self.datafiles():
209 276 yield x
210 277 # yield manifest before changelog
211 278 for x in reversed(self._walk('', False)):
212 279 yield x
213 280
214 281 def copylist(self):
215 282 return ['requires'] + _data.split()
216 283
217 284 def write(self):
218 285 pass
219 286
220 287 class encodedstore(basicstore):
221 288 def __init__(self, path, opener):
222 289 self.path = path + '/store'
223 290 self.createmode = _calcmode(self.path)
224 291 op = opener(self.path)
225 292 op.createmode = self.createmode
226 293 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
227 294
228 295 def datafiles(self):
229 296 for a, b, size in self._walk('data', True):
230 297 try:
231 298 a = decodefilename(a)
232 299 except KeyError:
233 300 a = None
234 301 yield a, b, size
235 302
236 303 def join(self, f):
237 304 return self.path + '/' + encodefilename(f)
238 305
239 306 def copylist(self):
240 307 return (['requires', '00changelog.i'] +
241 308 ['store/' + f for f in _data.split()])
242 309
243 310 class fncache(object):
244 311 # the filename used to be partially encoded
245 312 # hence the encodedir/decodedir dance
246 313 def __init__(self, opener):
247 314 self.opener = opener
248 315 self.entries = None
249 316 self._dirty = False
250 317
251 318 def _load(self):
252 319 '''fill the entries from the fncache file'''
253 320 self.entries = set()
254 321 self._dirty = False
255 322 try:
256 323 fp = self.opener('fncache', mode='rb')
257 324 except IOError:
258 325 # skip nonexistent file
259 326 return
260 327 for n, line in enumerate(fp):
261 328 if (len(line) < 2) or (line[-1] != '\n'):
262 329 t = _('invalid entry in fncache, line %s') % (n + 1)
263 330 raise util.Abort(t)
264 331 self.entries.add(decodedir(line[:-1]))
265 332 fp.close()
266 333
267 334 def rewrite(self, files):
268 335 fp = self.opener('fncache', mode='wb')
269 336 for p in files:
270 337 fp.write(encodedir(p) + '\n')
271 338 fp.close()
272 339 self.entries = set(files)
273 340 self._dirty = False
274 341
275 342 def write(self):
276 343 if not self._dirty:
277 344 return
278 345 fp = self.opener('fncache', mode='wb', atomictemp=True)
279 346 for p in self.entries:
280 347 fp.write(encodedir(p) + '\n')
281 348 fp.rename()
282 349 self._dirty = False
283 350
284 351 def add(self, fn):
285 352 if self.entries is None:
286 353 self._load()
287 354 if fn not in self.entries:
288 355 self._dirty = True
289 356 self.entries.add(fn)
290 357
291 358 def __contains__(self, fn):
292 359 if self.entries is None:
293 360 self._load()
294 361 return fn in self.entries
295 362
296 363 def __iter__(self):
297 364 if self.entries is None:
298 365 self._load()
299 366 return iter(self.entries)
300 367
301 368 class fncachestore(basicstore):
302 369 def __init__(self, path, opener, encode):
303 370 self.encode = encode
304 371 self.path = path + '/store'
305 372 self.createmode = _calcmode(self.path)
306 373 op = opener(self.path)
307 374 op.createmode = self.createmode
308 375 fnc = fncache(op)
309 376 self.fncache = fnc
310 377
311 378 def fncacheopener(path, mode='r', *args, **kw):
312 379 if mode not in ('r', 'rb') and path.startswith('data/'):
313 380 fnc.add(path)
314 381 return op(self.encode(path), mode, *args, **kw)
315 382 self.opener = fncacheopener
316 383
317 384 def join(self, f):
318 385 return self.path + '/' + self.encode(f)
319 386
320 387 def datafiles(self):
321 388 rewrite = False
322 389 existing = []
323 390 spath = self.path
324 391 for f in self.fncache:
325 392 ef = self.encode(f)
326 393 try:
327 394 st = os.stat(spath + '/' + ef)
328 395 yield f, ef, st.st_size
329 396 existing.append(f)
330 397 except OSError:
331 398 # nonexistent entry
332 399 rewrite = True
333 400 if rewrite:
334 401 # rewrite fncache to remove nonexistent entries
335 402 # (may be caused by rollback / strip)
336 403 self.fncache.rewrite(existing)
337 404
338 405 def copylist(self):
339 406 d = ('data dh fncache'
340 407 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
341 408 return (['requires', '00changelog.i'] +
342 409 ['store/' + f for f in d.split()])
343 410
344 411 def write(self):
345 412 self.fncache.write()
346 413
347 414 def store(requirements, path, opener):
348 415 if 'store' in requirements:
349 416 if 'fncache' in requirements:
350 417 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
351 418 encode = lambda f: _hybridencode(f, auxencode)
352 419 return fncachestore(path, opener, encode)
353 420 return encodedstore(path, opener)
354 421 return basicstore(path, opener)
@@ -1,29 +1,32 b''
1 1 # this is hack to make sure no escape characters are inserted into the output
2 2 import os
3 3 if 'TERM' in os.environ:
4 4 del os.environ['TERM']
5 5 import doctest
6 6
7 7 import mercurial.changelog
8 8 doctest.testmod(mercurial.changelog)
9 9
10 10 import mercurial.dagparser
11 11 doctest.testmod(mercurial.dagparser, optionflags=doctest.NORMALIZE_WHITESPACE)
12 12
13 13 import mercurial.match
14 14 doctest.testmod(mercurial.match)
15 15
16 import mercurial.store
17 doctest.testmod(mercurial.store)
18
16 19 import mercurial.url
17 20 doctest.testmod(mercurial.url)
18 21
19 22 import mercurial.util
20 23 doctest.testmod(mercurial.util)
21 24
22 25 import mercurial.encoding
23 26 doctest.testmod(mercurial.encoding)
24 27
25 28 import mercurial.hgweb.hgwebdir_mod
26 29 doctest.testmod(mercurial.hgweb.hgwebdir_mod)
27 30
28 31 import hgext.convert.cvsps
29 32 doctest.testmod(hgext.convert.cvsps)
General Comments 0
You need to be logged in to leave comments. Login now