##// END OF EJS Templates
store: eliminate reference cycle in fncachestore...
Simon Heimberg -
r9133:996c1cd8 default
parent child Browse files
Show More
@@ -1,332 +1,333 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 from i18n import _
8 from i18n import _
9 import osutil, util
9 import osutil, util
10 import os, stat
10 import os, stat
11
11
12 _sha = util.sha1
12 _sha = util.sha1
13
13
14 # This avoids a collision between a file named foo and a dir named
14 # This avoids a collision between a file named foo and a dir named
15 # foo.i or foo.d
15 # foo.i or foo.d
16 def encodedir(path):
16 def encodedir(path):
17 if not path.startswith('data/'):
17 if not path.startswith('data/'):
18 return path
18 return path
19 return (path
19 return (path
20 .replace(".hg/", ".hg.hg/")
20 .replace(".hg/", ".hg.hg/")
21 .replace(".i/", ".i.hg/")
21 .replace(".i/", ".i.hg/")
22 .replace(".d/", ".d.hg/"))
22 .replace(".d/", ".d.hg/"))
23
23
24 def decodedir(path):
24 def decodedir(path):
25 if not path.startswith('data/'):
25 if not path.startswith('data/'):
26 return path
26 return path
27 return (path
27 return (path
28 .replace(".d.hg/", ".d/")
28 .replace(".d.hg/", ".d/")
29 .replace(".i.hg/", ".i/")
29 .replace(".i.hg/", ".i/")
30 .replace(".hg.hg/", ".hg/"))
30 .replace(".hg.hg/", ".hg/"))
31
31
32 def _buildencodefun():
32 def _buildencodefun():
33 e = '_'
33 e = '_'
34 win_reserved = [ord(x) for x in '\\:*?"<>|']
34 win_reserved = [ord(x) for x in '\\:*?"<>|']
35 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
35 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
36 for x in (range(32) + range(126, 256) + win_reserved):
36 for x in (range(32) + range(126, 256) + win_reserved):
37 cmap[chr(x)] = "~%02x" % x
37 cmap[chr(x)] = "~%02x" % x
38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
39 cmap[chr(x)] = e + chr(x).lower()
39 cmap[chr(x)] = e + chr(x).lower()
40 dmap = {}
40 dmap = {}
41 for k, v in cmap.iteritems():
41 for k, v in cmap.iteritems():
42 dmap[v] = k
42 dmap[v] = k
43 def decode(s):
43 def decode(s):
44 i = 0
44 i = 0
45 while i < len(s):
45 while i < len(s):
46 for l in xrange(1, 4):
46 for l in xrange(1, 4):
47 try:
47 try:
48 yield dmap[s[i:i+l]]
48 yield dmap[s[i:i+l]]
49 i += l
49 i += l
50 break
50 break
51 except KeyError:
51 except KeyError:
52 pass
52 pass
53 else:
53 else:
54 raise KeyError
54 raise KeyError
55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
56 lambda s: decodedir("".join(list(decode(s)))))
56 lambda s: decodedir("".join(list(decode(s)))))
57
57
58 encodefilename, decodefilename = _buildencodefun()
58 encodefilename, decodefilename = _buildencodefun()
59
59
60 def _build_lower_encodefun():
60 def _build_lower_encodefun():
61 win_reserved = [ord(x) for x in '\\:*?"<>|']
61 win_reserved = [ord(x) for x in '\\:*?"<>|']
62 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
62 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
63 for x in (range(32) + range(126, 256) + win_reserved):
63 for x in (range(32) + range(126, 256) + win_reserved):
64 cmap[chr(x)] = "~%02x" % x
64 cmap[chr(x)] = "~%02x" % x
65 for x in range(ord("A"), ord("Z")+1):
65 for x in range(ord("A"), ord("Z")+1):
66 cmap[chr(x)] = chr(x).lower()
66 cmap[chr(x)] = chr(x).lower()
67 return lambda s: "".join([cmap[c] for c in s])
67 return lambda s: "".join([cmap[c] for c in s])
68
68
69 lowerencode = _build_lower_encodefun()
69 lowerencode = _build_lower_encodefun()
70
70
71 _windows_reserved_filenames = '''con prn aux nul
71 _windows_reserved_filenames = '''con prn aux nul
72 com1 com2 com3 com4 com5 com6 com7 com8 com9
72 com1 com2 com3 com4 com5 com6 com7 com8 com9
73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
74 def auxencode(path):
74 def auxencode(path):
75 res = []
75 res = []
76 for n in path.split('/'):
76 for n in path.split('/'):
77 if n:
77 if n:
78 base = n.split('.')[0]
78 base = n.split('.')[0]
79 if base and (base in _windows_reserved_filenames):
79 if base and (base in _windows_reserved_filenames):
80 # encode third letter ('aux' -> 'au~78')
80 # encode third letter ('aux' -> 'au~78')
81 ec = "~%02x" % ord(n[2])
81 ec = "~%02x" % ord(n[2])
82 n = n[0:2] + ec + n[3:]
82 n = n[0:2] + ec + n[3:]
83 if n[-1] in '. ':
83 if n[-1] in '. ':
84 # encode last period or space ('foo...' -> 'foo..~2e')
84 # encode last period or space ('foo...' -> 'foo..~2e')
85 n = n[:-1] + "~%02x" % ord(n[-1])
85 n = n[:-1] + "~%02x" % ord(n[-1])
86 res.append(n)
86 res.append(n)
87 return '/'.join(res)
87 return '/'.join(res)
88
88
89 MAX_PATH_LEN_IN_HGSTORE = 120
89 MAX_PATH_LEN_IN_HGSTORE = 120
90 DIR_PREFIX_LEN = 8
90 DIR_PREFIX_LEN = 8
91 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
91 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
92 def hybridencode(path):
92 def hybridencode(path):
93 '''encodes path with a length limit
93 '''encodes path with a length limit
94
94
95 Encodes all paths that begin with 'data/', according to the following.
95 Encodes all paths that begin with 'data/', according to the following.
96
96
97 Default encoding (reversible):
97 Default encoding (reversible):
98
98
99 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
99 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
100 characters are encoded as '~xx', where xx is the two digit hex code
100 characters are encoded as '~xx', where xx is the two digit hex code
101 of the character (see encodefilename).
101 of the character (see encodefilename).
102 Relevant path components consisting of Windows reserved filenames are
102 Relevant path components consisting of Windows reserved filenames are
103 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
103 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
104
104
105 Hashed encoding (not reversible):
105 Hashed encoding (not reversible):
106
106
107 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
107 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
108 non-reversible hybrid hashing of the path is done instead.
108 non-reversible hybrid hashing of the path is done instead.
109 This encoding uses up to DIR_PREFIX_LEN characters of all directory
109 This encoding uses up to DIR_PREFIX_LEN characters of all directory
110 levels of the lowerencoded path, but not more levels than can fit into
110 levels of the lowerencoded path, but not more levels than can fit into
111 _MAX_SHORTENED_DIRS_LEN.
111 _MAX_SHORTENED_DIRS_LEN.
112 Then follows the filler followed by the sha digest of the full path.
112 Then follows the filler followed by the sha digest of the full path.
113 The filler is the beginning of the basename of the lowerencoded path
113 The filler is the beginning of the basename of the lowerencoded path
114 (the basename is everything after the last path separator). The filler
114 (the basename is everything after the last path separator). The filler
115 is as long as possible, filling in characters from the basename until
115 is as long as possible, filling in characters from the basename until
116 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
116 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
117 of the basename have been taken).
117 of the basename have been taken).
118 The extension (e.g. '.i' or '.d') is preserved.
118 The extension (e.g. '.i' or '.d') is preserved.
119
119
120 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
120 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
121 encoding was used.
121 encoding was used.
122 '''
122 '''
123 if not path.startswith('data/'):
123 if not path.startswith('data/'):
124 return path
124 return path
125 # escape directories ending with .i and .d
125 # escape directories ending with .i and .d
126 path = encodedir(path)
126 path = encodedir(path)
127 ndpath = path[len('data/'):]
127 ndpath = path[len('data/'):]
128 res = 'data/' + auxencode(encodefilename(ndpath))
128 res = 'data/' + auxencode(encodefilename(ndpath))
129 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
129 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
130 digest = _sha(path).hexdigest()
130 digest = _sha(path).hexdigest()
131 aep = auxencode(lowerencode(ndpath))
131 aep = auxencode(lowerencode(ndpath))
132 _root, ext = os.path.splitext(aep)
132 _root, ext = os.path.splitext(aep)
133 parts = aep.split('/')
133 parts = aep.split('/')
134 basename = parts[-1]
134 basename = parts[-1]
135 sdirs = []
135 sdirs = []
136 for p in parts[:-1]:
136 for p in parts[:-1]:
137 d = p[:DIR_PREFIX_LEN]
137 d = p[:DIR_PREFIX_LEN]
138 if d[-1] in '. ':
138 if d[-1] in '. ':
139 # Windows can't access dirs ending in period or space
139 # Windows can't access dirs ending in period or space
140 d = d[:-1] + '_'
140 d = d[:-1] + '_'
141 t = '/'.join(sdirs) + '/' + d
141 t = '/'.join(sdirs) + '/' + d
142 if len(t) > _MAX_SHORTENED_DIRS_LEN:
142 if len(t) > _MAX_SHORTENED_DIRS_LEN:
143 break
143 break
144 sdirs.append(d)
144 sdirs.append(d)
145 dirs = '/'.join(sdirs)
145 dirs = '/'.join(sdirs)
146 if len(dirs) > 0:
146 if len(dirs) > 0:
147 dirs += '/'
147 dirs += '/'
148 res = 'dh/' + dirs + digest + ext
148 res = 'dh/' + dirs + digest + ext
149 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
149 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
150 if space_left > 0:
150 if space_left > 0:
151 filler = basename[:space_left]
151 filler = basename[:space_left]
152 res = 'dh/' + dirs + filler + digest + ext
152 res = 'dh/' + dirs + filler + digest + ext
153 return res
153 return res
154
154
155 def _calcmode(path):
155 def _calcmode(path):
156 try:
156 try:
157 # files in .hg/ will be created using this mode
157 # files in .hg/ will be created using this mode
158 mode = os.stat(path).st_mode
158 mode = os.stat(path).st_mode
159 # avoid some useless chmods
159 # avoid some useless chmods
160 if (0777 & ~util.umask) == (0777 & mode):
160 if (0777 & ~util.umask) == (0777 & mode):
161 mode = None
161 mode = None
162 except OSError:
162 except OSError:
163 mode = None
163 mode = None
164 return mode
164 return mode
165
165
166 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
166 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
167
167
168 class basicstore(object):
168 class basicstore(object):
169 '''base class for local repository stores'''
169 '''base class for local repository stores'''
170 def __init__(self, path, opener, pathjoiner):
170 def __init__(self, path, opener, pathjoiner):
171 self.pathjoiner = pathjoiner
171 self.pathjoiner = pathjoiner
172 self.path = path
172 self.path = path
173 self.createmode = _calcmode(path)
173 self.createmode = _calcmode(path)
174 op = opener(self.path)
174 op = opener(self.path)
175 op.createmode = self.createmode
175 op.createmode = self.createmode
176 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
176 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
177
177
178 def join(self, f):
178 def join(self, f):
179 return self.pathjoiner(self.path, encodedir(f))
179 return self.pathjoiner(self.path, encodedir(f))
180
180
181 def _walk(self, relpath, recurse):
181 def _walk(self, relpath, recurse):
182 '''yields (unencoded, encoded, size)'''
182 '''yields (unencoded, encoded, size)'''
183 path = self.pathjoiner(self.path, relpath)
183 path = self.pathjoiner(self.path, relpath)
184 striplen = len(self.path) + len(os.sep)
184 striplen = len(self.path) + len(os.sep)
185 l = []
185 l = []
186 if os.path.isdir(path):
186 if os.path.isdir(path):
187 visit = [path]
187 visit = [path]
188 while visit:
188 while visit:
189 p = visit.pop()
189 p = visit.pop()
190 for f, kind, st in osutil.listdir(p, stat=True):
190 for f, kind, st in osutil.listdir(p, stat=True):
191 fp = self.pathjoiner(p, f)
191 fp = self.pathjoiner(p, f)
192 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
192 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
193 n = util.pconvert(fp[striplen:])
193 n = util.pconvert(fp[striplen:])
194 l.append((decodedir(n), n, st.st_size))
194 l.append((decodedir(n), n, st.st_size))
195 elif kind == stat.S_IFDIR and recurse:
195 elif kind == stat.S_IFDIR and recurse:
196 visit.append(fp)
196 visit.append(fp)
197 return sorted(l)
197 return sorted(l)
198
198
199 def datafiles(self):
199 def datafiles(self):
200 return self._walk('data', True)
200 return self._walk('data', True)
201
201
202 def walk(self):
202 def walk(self):
203 '''yields (unencoded, encoded, size)'''
203 '''yields (unencoded, encoded, size)'''
204 # yield data files first
204 # yield data files first
205 for x in self.datafiles():
205 for x in self.datafiles():
206 yield x
206 yield x
207 # yield manifest before changelog
207 # yield manifest before changelog
208 for x in reversed(self._walk('', False)):
208 for x in reversed(self._walk('', False)):
209 yield x
209 yield x
210
210
211 def copylist(self):
211 def copylist(self):
212 return ['requires'] + _data.split()
212 return ['requires'] + _data.split()
213
213
214 class encodedstore(basicstore):
214 class encodedstore(basicstore):
215 def __init__(self, path, opener, pathjoiner):
215 def __init__(self, path, opener, pathjoiner):
216 self.pathjoiner = pathjoiner
216 self.pathjoiner = pathjoiner
217 self.path = self.pathjoiner(path, 'store')
217 self.path = self.pathjoiner(path, 'store')
218 self.createmode = _calcmode(self.path)
218 self.createmode = _calcmode(self.path)
219 op = opener(self.path)
219 op = opener(self.path)
220 op.createmode = self.createmode
220 op.createmode = self.createmode
221 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
221 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
222
222
223 def datafiles(self):
223 def datafiles(self):
224 for a, b, size in self._walk('data', True):
224 for a, b, size in self._walk('data', True):
225 try:
225 try:
226 a = decodefilename(a)
226 a = decodefilename(a)
227 except KeyError:
227 except KeyError:
228 a = None
228 a = None
229 yield a, b, size
229 yield a, b, size
230
230
231 def join(self, f):
231 def join(self, f):
232 return self.pathjoiner(self.path, encodefilename(f))
232 return self.pathjoiner(self.path, encodefilename(f))
233
233
234 def copylist(self):
234 def copylist(self):
235 return (['requires', '00changelog.i'] +
235 return (['requires', '00changelog.i'] +
236 [self.pathjoiner('store', f) for f in _data.split()])
236 [self.pathjoiner('store', f) for f in _data.split()])
237
237
238 class fncache(object):
238 class fncache(object):
239 # the filename used to be partially encoded
239 # the filename used to be partially encoded
240 # hence the encodedir/decodedir dance
240 # hence the encodedir/decodedir dance
241 def __init__(self, opener):
241 def __init__(self, opener):
242 self.opener = opener
242 self.opener = opener
243 self.entries = None
243 self.entries = None
244
244
245 def _load(self):
245 def _load(self):
246 '''fill the entries from the fncache file'''
246 '''fill the entries from the fncache file'''
247 self.entries = set()
247 self.entries = set()
248 try:
248 try:
249 fp = self.opener('fncache', mode='rb')
249 fp = self.opener('fncache', mode='rb')
250 except IOError:
250 except IOError:
251 # skip nonexistent file
251 # skip nonexistent file
252 return
252 return
253 for n, line in enumerate(fp):
253 for n, line in enumerate(fp):
254 if (len(line) < 2) or (line[-1] != '\n'):
254 if (len(line) < 2) or (line[-1] != '\n'):
255 t = _('invalid entry in fncache, line %s') % (n + 1)
255 t = _('invalid entry in fncache, line %s') % (n + 1)
256 raise util.Abort(t)
256 raise util.Abort(t)
257 self.entries.add(decodedir(line[:-1]))
257 self.entries.add(decodedir(line[:-1]))
258 fp.close()
258 fp.close()
259
259
260 def rewrite(self, files):
260 def rewrite(self, files):
261 fp = self.opener('fncache', mode='wb')
261 fp = self.opener('fncache', mode='wb')
262 for p in files:
262 for p in files:
263 fp.write(encodedir(p) + '\n')
263 fp.write(encodedir(p) + '\n')
264 fp.close()
264 fp.close()
265 self.entries = set(files)
265 self.entries = set(files)
266
266
267 def add(self, fn):
267 def add(self, fn):
268 if self.entries is None:
268 if self.entries is None:
269 self._load()
269 self._load()
270 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
270 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
271
271
272 def __contains__(self, fn):
272 def __contains__(self, fn):
273 if self.entries is None:
273 if self.entries is None:
274 self._load()
274 self._load()
275 return fn in self.entries
275 return fn in self.entries
276
276
277 def __iter__(self):
277 def __iter__(self):
278 if self.entries is None:
278 if self.entries is None:
279 self._load()
279 self._load()
280 return iter(self.entries)
280 return iter(self.entries)
281
281
282 class fncachestore(basicstore):
282 class fncachestore(basicstore):
283 def __init__(self, path, opener, pathjoiner):
283 def __init__(self, path, opener, pathjoiner):
284 self.pathjoiner = pathjoiner
284 self.pathjoiner = pathjoiner
285 self.path = self.pathjoiner(path, 'store')
285 self.path = self.pathjoiner(path, 'store')
286 self.createmode = _calcmode(self.path)
286 self.createmode = _calcmode(self.path)
287 self._op = opener(self.path)
287 op = opener(self.path)
288 self._op.createmode = self.createmode
288 op.createmode = self.createmode
289 self.fncache = fncache(self._op)
289 fnc = fncache(op)
290 self.fncache = fnc
290
291
291 def fncacheopener(path, mode='r', *args, **kw):
292 def fncacheopener(path, mode='r', *args, **kw):
292 if (mode not in ('r', 'rb')
293 if (mode not in ('r', 'rb')
293 and path.startswith('data/')
294 and path.startswith('data/')
294 and path not in self.fncache):
295 and path not in fnc):
295 self.fncache.add(path)
296 fnc.add(path)
296 return self._op(hybridencode(path), mode, *args, **kw)
297 return op(hybridencode(path), mode, *args, **kw)
297 self.opener = fncacheopener
298 self.opener = fncacheopener
298
299
299 def join(self, f):
300 def join(self, f):
300 return self.pathjoiner(self.path, hybridencode(f))
301 return self.pathjoiner(self.path, hybridencode(f))
301
302
302 def datafiles(self):
303 def datafiles(self):
303 rewrite = False
304 rewrite = False
304 existing = []
305 existing = []
305 pjoin = self.pathjoiner
306 pjoin = self.pathjoiner
306 spath = self.path
307 spath = self.path
307 for f in self.fncache:
308 for f in self.fncache:
308 ef = hybridencode(f)
309 ef = hybridencode(f)
309 try:
310 try:
310 st = os.stat(pjoin(spath, ef))
311 st = os.stat(pjoin(spath, ef))
311 yield f, ef, st.st_size
312 yield f, ef, st.st_size
312 existing.append(f)
313 existing.append(f)
313 except OSError:
314 except OSError:
314 # nonexistent entry
315 # nonexistent entry
315 rewrite = True
316 rewrite = True
316 if rewrite:
317 if rewrite:
317 # rewrite fncache to remove nonexistent entries
318 # rewrite fncache to remove nonexistent entries
318 # (may be caused by rollback / strip)
319 # (may be caused by rollback / strip)
319 self.fncache.rewrite(existing)
320 self.fncache.rewrite(existing)
320
321
321 def copylist(self):
322 def copylist(self):
322 d = _data + ' dh fncache'
323 d = _data + ' dh fncache'
323 return (['requires', '00changelog.i'] +
324 return (['requires', '00changelog.i'] +
324 [self.pathjoiner('store', f) for f in d.split()])
325 [self.pathjoiner('store', f) for f in d.split()])
325
326
326 def store(requirements, path, opener, pathjoiner=None):
327 def store(requirements, path, opener, pathjoiner=None):
327 pathjoiner = pathjoiner or os.path.join
328 pathjoiner = pathjoiner or os.path.join
328 if 'store' in requirements:
329 if 'store' in requirements:
329 if 'fncache' in requirements:
330 if 'fncache' in requirements:
330 return fncachestore(path, opener, pathjoiner)
331 return fncachestore(path, opener, pathjoiner)
331 return encodedstore(path, opener, pathjoiner)
332 return encodedstore(path, opener, pathjoiner)
332 return basicstore(path, opener, pathjoiner)
333 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now