##// END OF EJS Templates
store: refactor the fncache handling...
Benoit Boissinot -
r8530:03196ac9 default
parent child Browse files
Show More
@@ -1,294 +1,309 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 from i18n import _
9 9 import osutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 def _buildencodefun():
15 15 e = '_'
16 16 win_reserved = [ord(x) for x in '\\:*?"<>|']
17 17 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
18 18 for x in (range(32) + range(126, 256) + win_reserved):
19 19 cmap[chr(x)] = "~%02x" % x
20 20 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
21 21 cmap[chr(x)] = e + chr(x).lower()
22 22 dmap = {}
23 23 for k, v in cmap.iteritems():
24 24 dmap[v] = k
25 25 def decode(s):
26 26 i = 0
27 27 while i < len(s):
28 28 for l in xrange(1, 4):
29 29 try:
30 30 yield dmap[s[i:i+l]]
31 31 i += l
32 32 break
33 33 except KeyError:
34 34 pass
35 35 else:
36 36 raise KeyError
37 37 return (lambda s: "".join([cmap[c] for c in s]),
38 38 lambda s: "".join(list(decode(s))))
39 39
40 40 encodefilename, decodefilename = _buildencodefun()
41 41
42 42 def _build_lower_encodefun():
43 43 win_reserved = [ord(x) for x in '\\:*?"<>|']
44 44 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
45 45 for x in (range(32) + range(126, 256) + win_reserved):
46 46 cmap[chr(x)] = "~%02x" % x
47 47 for x in range(ord("A"), ord("Z")+1):
48 48 cmap[chr(x)] = chr(x).lower()
49 49 return lambda s: "".join([cmap[c] for c in s])
50 50
51 51 lowerencode = _build_lower_encodefun()
52 52
53 53 _windows_reserved_filenames = '''con prn aux nul
54 54 com1 com2 com3 com4 com5 com6 com7 com8 com9
55 55 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
56 56 def auxencode(path):
57 57 res = []
58 58 for n in path.split('/'):
59 59 if n:
60 60 base = n.split('.')[0]
61 61 if base and (base in _windows_reserved_filenames):
62 62 # encode third letter ('aux' -> 'au~78')
63 63 ec = "~%02x" % ord(n[2])
64 64 n = n[0:2] + ec + n[3:]
65 65 if n[-1] in '. ':
66 66 # encode last period or space ('foo...' -> 'foo..~2e')
67 67 n = n[:-1] + "~%02x" % ord(n[-1])
68 68 res.append(n)
69 69 return '/'.join(res)
70 70
71 71 MAX_PATH_LEN_IN_HGSTORE = 120
72 72 DIR_PREFIX_LEN = 8
73 73 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
74 74 def hybridencode(path):
75 75 '''encodes path with a length limit
76 76
77 77 Encodes all paths that begin with 'data/', according to the following.
78 78
79 79 Default encoding (reversible):
80 80
81 81 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
82 82 characters are encoded as '~xx', where xx is the two digit hex code
83 83 of the character (see encodefilename).
84 84 Relevant path components consisting of Windows reserved filenames are
85 85 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
86 86
87 87 Hashed encoding (not reversible):
88 88
89 89 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
90 90 non-reversible hybrid hashing of the path is done instead.
91 91 This encoding uses up to DIR_PREFIX_LEN characters of all directory
92 92 levels of the lowerencoded path, but not more levels than can fit into
93 93 _MAX_SHORTENED_DIRS_LEN.
94 94 Then follows the filler followed by the sha digest of the full path.
95 95 The filler is the beginning of the basename of the lowerencoded path
96 96 (the basename is everything after the last path separator). The filler
97 97 is as long as possible, filling in characters from the basename until
98 98 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
99 99 of the basename have been taken).
100 100 The extension (e.g. '.i' or '.d') is preserved.
101 101
102 102 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
103 103 encoding was used.
104 104 '''
105 105 if not path.startswith('data/'):
106 106 return path
107 107 ndpath = path[len('data/'):]
108 108 res = 'data/' + auxencode(encodefilename(ndpath))
109 109 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
110 110 digest = _sha(path).hexdigest()
111 111 aep = auxencode(lowerencode(ndpath))
112 112 _root, ext = os.path.splitext(aep)
113 113 parts = aep.split('/')
114 114 basename = parts[-1]
115 115 sdirs = []
116 116 for p in parts[:-1]:
117 117 d = p[:DIR_PREFIX_LEN]
118 118 if d[-1] in '. ':
119 119 # Windows can't access dirs ending in period or space
120 120 d = d[:-1] + '_'
121 121 t = '/'.join(sdirs) + '/' + d
122 122 if len(t) > _MAX_SHORTENED_DIRS_LEN:
123 123 break
124 124 sdirs.append(d)
125 125 dirs = '/'.join(sdirs)
126 126 if len(dirs) > 0:
127 127 dirs += '/'
128 128 res = 'dh/' + dirs + digest + ext
129 129 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
130 130 if space_left > 0:
131 131 filler = basename[:space_left]
132 132 res = 'dh/' + dirs + filler + digest + ext
133 133 return res
134 134
135 135 def _calcmode(path):
136 136 try:
137 137 # files in .hg/ will be created using this mode
138 138 mode = os.stat(path).st_mode
139 139 # avoid some useless chmods
140 140 if (0777 & ~util.umask) == (0777 & mode):
141 141 mode = None
142 142 except OSError:
143 143 mode = None
144 144 return mode
145 145
146 146 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
147 147
148 148 class basicstore:
149 149 '''base class for local repository stores'''
150 150 def __init__(self, path, opener, pathjoiner):
151 151 self.pathjoiner = pathjoiner
152 152 self.path = path
153 153 self.createmode = _calcmode(path)
154 154 self.opener = opener(self.path)
155 155 self.opener.createmode = self.createmode
156 156
157 157 def join(self, f):
158 158 return self.pathjoiner(self.path, f)
159 159
160 160 def _walk(self, relpath, recurse):
161 161 '''yields (unencoded, encoded, size)'''
162 162 path = self.pathjoiner(self.path, relpath)
163 163 striplen = len(self.path) + len(os.sep)
164 164 l = []
165 165 if os.path.isdir(path):
166 166 visit = [path]
167 167 while visit:
168 168 p = visit.pop()
169 169 for f, kind, st in osutil.listdir(p, stat=True):
170 170 fp = self.pathjoiner(p, f)
171 171 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
172 172 n = util.pconvert(fp[striplen:])
173 173 l.append((n, n, st.st_size))
174 174 elif kind == stat.S_IFDIR and recurse:
175 175 visit.append(fp)
176 176 return sorted(l)
177 177
178 178 def datafiles(self):
179 179 return self._walk('data', True)
180 180
181 181 def walk(self):
182 182 '''yields (unencoded, encoded, size)'''
183 183 # yield data files first
184 184 for x in self.datafiles():
185 185 yield x
186 186 # yield manifest before changelog
187 187 for x in reversed(self._walk('', False)):
188 188 yield x
189 189
190 190 def copylist(self):
191 191 return ['requires'] + _data.split()
192 192
193 193 class encodedstore(basicstore):
194 194 def __init__(self, path, opener, pathjoiner):
195 195 self.pathjoiner = pathjoiner
196 196 self.path = self.pathjoiner(path, 'store')
197 197 self.createmode = _calcmode(self.path)
198 198 op = opener(self.path)
199 199 op.createmode = self.createmode
200 200 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
201 201
202 202 def datafiles(self):
203 203 for a, b, size in self._walk('data', True):
204 204 try:
205 205 a = decodefilename(a)
206 206 except KeyError:
207 207 a = None
208 208 yield a, b, size
209 209
210 210 def join(self, f):
211 211 return self.pathjoiner(self.path, encodefilename(f))
212 212
213 213 def copylist(self):
214 214 return (['requires', '00changelog.i'] +
215 215 [self.pathjoiner('store', f) for f in _data.split()])
216 216
217 def fncache(opener):
218 '''yields the entries in the fncache file'''
219 try:
220 fp = opener('fncache', mode='rb')
221 except IOError:
222 # skip nonexistent file
223 return
224 for n, line in enumerate(fp):
225 if (len(line) < 2) or (line[-1] != '\n'):
226 t = _('invalid entry in fncache, line %s') % (n + 1)
227 raise util.Abort(t)
228 yield line[:-1]
229 fp.close()
230
231 class fncacheopener(object):
217 class fncache(object):
232 218 def __init__(self, opener):
233 219 self.opener = opener
234 220 self.entries = None
235 221
236 def loadfncache(self):
237 self.entries = set(fncache(self.opener))
222 def _load(self):
223 '''fill the entries from the fncache file'''
224 self.entries = set()
225 try:
226 fp = self.opener('fncache', mode='rb')
227 except IOError:
228 # skip nonexistent file
229 return
230 for n, line in enumerate(fp):
231 if (len(line) < 2) or (line[-1] != '\n'):
232 t = _('invalid entry in fncache, line %s') % (n + 1)
233 raise util.Abort(t)
234 self.entries.add(line[:-1])
235 fp.close()
238 236
239 def __call__(self, path, mode='r', *args, **kw):
240 if mode not in ('r', 'rb') and path.startswith('data/'):
241 if self.entries is None:
242 self.loadfncache()
243 if path not in self.entries:
244 self.opener('fncache', 'ab').write(path + '\n')
245 # fncache may contain non-existent files after rollback / strip
246 self.entries.add(path)
247 return self.opener(hybridencode(path), mode, *args, **kw)
237 def rewrite(self, files):
238 fp = self.opener('fncache', mode='wb')
239 for p in files:
240 fp.write(p + '\n')
241 fp.close()
242 self.entries = set(files)
243
244 def add(self, fn):
245 if self.entries is None:
246 self._load()
247 self.opener('fncache', 'ab').write(fn + '\n')
248
249 def __contains__(self, fn):
250 if self.entries is None:
251 self._load()
252 return fn in self.entries
253
254 def __iter__(self):
255 if self.entries is None:
256 self._load()
257 return iter(self.entries)
248 258
249 259 class fncachestore(basicstore):
250 260 def __init__(self, path, opener, pathjoiner):
251 261 self.pathjoiner = pathjoiner
252 262 self.path = self.pathjoiner(path, 'store')
253 263 self.createmode = _calcmode(self.path)
254 264 self._op = opener(self.path)
255 265 self._op.createmode = self.createmode
256 self.opener = fncacheopener(self._op)
266 self.fncache = fncache(self._op)
267
268 def fncacheopener(path, mode='r', *args, **kw):
269 if (mode not in ('r', 'rb')
270 and path.startswith('data/')
271 and path not in self.fncache):
272 self.fncache.add(path)
273 return self._op(hybridencode(path), mode, *args, **kw)
274 self.opener = fncacheopener
257 275
258 276 def join(self, f):
259 277 return self.pathjoiner(self.path, hybridencode(f))
260 278
261 279 def datafiles(self):
262 280 rewrite = False
263 281 existing = []
264 282 pjoin = self.pathjoiner
265 283 spath = self.path
266 for f in fncache(self._op):
284 for f in self.fncache:
267 285 ef = hybridencode(f)
268 286 try:
269 287 st = os.stat(pjoin(spath, ef))
270 288 yield f, ef, st.st_size
271 289 existing.append(f)
272 290 except OSError:
273 291 # nonexistent entry
274 292 rewrite = True
275 293 if rewrite:
276 294 # rewrite fncache to remove nonexistent entries
277 295 # (may be caused by rollback / strip)
278 fp = self._op('fncache', mode='wb')
279 for p in existing:
280 fp.write(p + '\n')
281 fp.close()
296 self.fncache.rewrite(existing)
282 297
283 298 def copylist(self):
284 299 d = _data + ' dh fncache'
285 300 return (['requires', '00changelog.i'] +
286 301 [self.pathjoiner('store', f) for f in d.split()])
287 302
288 303 def store(requirements, path, opener, pathjoiner=None):
289 304 pathjoiner = pathjoiner or os.path.join
290 305 if 'store' in requirements:
291 306 if 'fncache' in requirements:
292 307 return fncachestore(path, opener, pathjoiner)
293 308 return encodedstore(path, opener, pathjoiner)
294 309 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now