##// END OF EJS Templates
store: refactor the fncache handling...
Benoit Boissinot -
r8530:03196ac9 default
parent child Browse files
Show More
@@ -1,294 +1,309 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 from i18n import _
8 from i18n import _
9 import osutil, util
9 import osutil, util
10 import os, stat
10 import os, stat
11
11
12 _sha = util.sha1
12 _sha = util.sha1
13
13
14 def _buildencodefun():
14 def _buildencodefun():
15 e = '_'
15 e = '_'
16 win_reserved = [ord(x) for x in '\\:*?"<>|']
16 win_reserved = [ord(x) for x in '\\:*?"<>|']
17 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
17 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
18 for x in (range(32) + range(126, 256) + win_reserved):
18 for x in (range(32) + range(126, 256) + win_reserved):
19 cmap[chr(x)] = "~%02x" % x
19 cmap[chr(x)] = "~%02x" % x
20 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
20 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
21 cmap[chr(x)] = e + chr(x).lower()
21 cmap[chr(x)] = e + chr(x).lower()
22 dmap = {}
22 dmap = {}
23 for k, v in cmap.iteritems():
23 for k, v in cmap.iteritems():
24 dmap[v] = k
24 dmap[v] = k
25 def decode(s):
25 def decode(s):
26 i = 0
26 i = 0
27 while i < len(s):
27 while i < len(s):
28 for l in xrange(1, 4):
28 for l in xrange(1, 4):
29 try:
29 try:
30 yield dmap[s[i:i+l]]
30 yield dmap[s[i:i+l]]
31 i += l
31 i += l
32 break
32 break
33 except KeyError:
33 except KeyError:
34 pass
34 pass
35 else:
35 else:
36 raise KeyError
36 raise KeyError
37 return (lambda s: "".join([cmap[c] for c in s]),
37 return (lambda s: "".join([cmap[c] for c in s]),
38 lambda s: "".join(list(decode(s))))
38 lambda s: "".join(list(decode(s))))
39
39
40 encodefilename, decodefilename = _buildencodefun()
40 encodefilename, decodefilename = _buildencodefun()
41
41
42 def _build_lower_encodefun():
42 def _build_lower_encodefun():
43 win_reserved = [ord(x) for x in '\\:*?"<>|']
43 win_reserved = [ord(x) for x in '\\:*?"<>|']
44 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
44 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
45 for x in (range(32) + range(126, 256) + win_reserved):
45 for x in (range(32) + range(126, 256) + win_reserved):
46 cmap[chr(x)] = "~%02x" % x
46 cmap[chr(x)] = "~%02x" % x
47 for x in range(ord("A"), ord("Z")+1):
47 for x in range(ord("A"), ord("Z")+1):
48 cmap[chr(x)] = chr(x).lower()
48 cmap[chr(x)] = chr(x).lower()
49 return lambda s: "".join([cmap[c] for c in s])
49 return lambda s: "".join([cmap[c] for c in s])
50
50
51 lowerencode = _build_lower_encodefun()
51 lowerencode = _build_lower_encodefun()
52
52
53 _windows_reserved_filenames = '''con prn aux nul
53 _windows_reserved_filenames = '''con prn aux nul
54 com1 com2 com3 com4 com5 com6 com7 com8 com9
54 com1 com2 com3 com4 com5 com6 com7 com8 com9
55 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
55 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
56 def auxencode(path):
56 def auxencode(path):
57 res = []
57 res = []
58 for n in path.split('/'):
58 for n in path.split('/'):
59 if n:
59 if n:
60 base = n.split('.')[0]
60 base = n.split('.')[0]
61 if base and (base in _windows_reserved_filenames):
61 if base and (base in _windows_reserved_filenames):
62 # encode third letter ('aux' -> 'au~78')
62 # encode third letter ('aux' -> 'au~78')
63 ec = "~%02x" % ord(n[2])
63 ec = "~%02x" % ord(n[2])
64 n = n[0:2] + ec + n[3:]
64 n = n[0:2] + ec + n[3:]
65 if n[-1] in '. ':
65 if n[-1] in '. ':
66 # encode last period or space ('foo...' -> 'foo..~2e')
66 # encode last period or space ('foo...' -> 'foo..~2e')
67 n = n[:-1] + "~%02x" % ord(n[-1])
67 n = n[:-1] + "~%02x" % ord(n[-1])
68 res.append(n)
68 res.append(n)
69 return '/'.join(res)
69 return '/'.join(res)
70
70
71 MAX_PATH_LEN_IN_HGSTORE = 120
71 MAX_PATH_LEN_IN_HGSTORE = 120
72 DIR_PREFIX_LEN = 8
72 DIR_PREFIX_LEN = 8
73 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
73 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
74 def hybridencode(path):
74 def hybridencode(path):
75 '''encodes path with a length limit
75 '''encodes path with a length limit
76
76
77 Encodes all paths that begin with 'data/', according to the following.
77 Encodes all paths that begin with 'data/', according to the following.
78
78
79 Default encoding (reversible):
79 Default encoding (reversible):
80
80
81 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
81 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
82 characters are encoded as '~xx', where xx is the two digit hex code
82 characters are encoded as '~xx', where xx is the two digit hex code
83 of the character (see encodefilename).
83 of the character (see encodefilename).
84 Relevant path components consisting of Windows reserved filenames are
84 Relevant path components consisting of Windows reserved filenames are
85 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
85 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
86
86
87 Hashed encoding (not reversible):
87 Hashed encoding (not reversible):
88
88
89 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
89 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
90 non-reversible hybrid hashing of the path is done instead.
90 non-reversible hybrid hashing of the path is done instead.
91 This encoding uses up to DIR_PREFIX_LEN characters of all directory
91 This encoding uses up to DIR_PREFIX_LEN characters of all directory
92 levels of the lowerencoded path, but not more levels than can fit into
92 levels of the lowerencoded path, but not more levels than can fit into
93 _MAX_SHORTENED_DIRS_LEN.
93 _MAX_SHORTENED_DIRS_LEN.
94 Then follows the filler followed by the sha digest of the full path.
94 Then follows the filler followed by the sha digest of the full path.
95 The filler is the beginning of the basename of the lowerencoded path
95 The filler is the beginning of the basename of the lowerencoded path
96 (the basename is everything after the last path separator). The filler
96 (the basename is everything after the last path separator). The filler
97 is as long as possible, filling in characters from the basename until
97 is as long as possible, filling in characters from the basename until
98 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
98 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
99 of the basename have been taken).
99 of the basename have been taken).
100 The extension (e.g. '.i' or '.d') is preserved.
100 The extension (e.g. '.i' or '.d') is preserved.
101
101
102 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
102 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
103 encoding was used.
103 encoding was used.
104 '''
104 '''
105 if not path.startswith('data/'):
105 if not path.startswith('data/'):
106 return path
106 return path
107 ndpath = path[len('data/'):]
107 ndpath = path[len('data/'):]
108 res = 'data/' + auxencode(encodefilename(ndpath))
108 res = 'data/' + auxencode(encodefilename(ndpath))
109 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
109 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
110 digest = _sha(path).hexdigest()
110 digest = _sha(path).hexdigest()
111 aep = auxencode(lowerencode(ndpath))
111 aep = auxencode(lowerencode(ndpath))
112 _root, ext = os.path.splitext(aep)
112 _root, ext = os.path.splitext(aep)
113 parts = aep.split('/')
113 parts = aep.split('/')
114 basename = parts[-1]
114 basename = parts[-1]
115 sdirs = []
115 sdirs = []
116 for p in parts[:-1]:
116 for p in parts[:-1]:
117 d = p[:DIR_PREFIX_LEN]
117 d = p[:DIR_PREFIX_LEN]
118 if d[-1] in '. ':
118 if d[-1] in '. ':
119 # Windows can't access dirs ending in period or space
119 # Windows can't access dirs ending in period or space
120 d = d[:-1] + '_'
120 d = d[:-1] + '_'
121 t = '/'.join(sdirs) + '/' + d
121 t = '/'.join(sdirs) + '/' + d
122 if len(t) > _MAX_SHORTENED_DIRS_LEN:
122 if len(t) > _MAX_SHORTENED_DIRS_LEN:
123 break
123 break
124 sdirs.append(d)
124 sdirs.append(d)
125 dirs = '/'.join(sdirs)
125 dirs = '/'.join(sdirs)
126 if len(dirs) > 0:
126 if len(dirs) > 0:
127 dirs += '/'
127 dirs += '/'
128 res = 'dh/' + dirs + digest + ext
128 res = 'dh/' + dirs + digest + ext
129 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
129 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
130 if space_left > 0:
130 if space_left > 0:
131 filler = basename[:space_left]
131 filler = basename[:space_left]
132 res = 'dh/' + dirs + filler + digest + ext
132 res = 'dh/' + dirs + filler + digest + ext
133 return res
133 return res
134
134
135 def _calcmode(path):
135 def _calcmode(path):
136 try:
136 try:
137 # files in .hg/ will be created using this mode
137 # files in .hg/ will be created using this mode
138 mode = os.stat(path).st_mode
138 mode = os.stat(path).st_mode
139 # avoid some useless chmods
139 # avoid some useless chmods
140 if (0777 & ~util.umask) == (0777 & mode):
140 if (0777 & ~util.umask) == (0777 & mode):
141 mode = None
141 mode = None
142 except OSError:
142 except OSError:
143 mode = None
143 mode = None
144 return mode
144 return mode
145
145
146 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
146 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
147
147
148 class basicstore:
148 class basicstore:
149 '''base class for local repository stores'''
149 '''base class for local repository stores'''
150 def __init__(self, path, opener, pathjoiner):
150 def __init__(self, path, opener, pathjoiner):
151 self.pathjoiner = pathjoiner
151 self.pathjoiner = pathjoiner
152 self.path = path
152 self.path = path
153 self.createmode = _calcmode(path)
153 self.createmode = _calcmode(path)
154 self.opener = opener(self.path)
154 self.opener = opener(self.path)
155 self.opener.createmode = self.createmode
155 self.opener.createmode = self.createmode
156
156
157 def join(self, f):
157 def join(self, f):
158 return self.pathjoiner(self.path, f)
158 return self.pathjoiner(self.path, f)
159
159
160 def _walk(self, relpath, recurse):
160 def _walk(self, relpath, recurse):
161 '''yields (unencoded, encoded, size)'''
161 '''yields (unencoded, encoded, size)'''
162 path = self.pathjoiner(self.path, relpath)
162 path = self.pathjoiner(self.path, relpath)
163 striplen = len(self.path) + len(os.sep)
163 striplen = len(self.path) + len(os.sep)
164 l = []
164 l = []
165 if os.path.isdir(path):
165 if os.path.isdir(path):
166 visit = [path]
166 visit = [path]
167 while visit:
167 while visit:
168 p = visit.pop()
168 p = visit.pop()
169 for f, kind, st in osutil.listdir(p, stat=True):
169 for f, kind, st in osutil.listdir(p, stat=True):
170 fp = self.pathjoiner(p, f)
170 fp = self.pathjoiner(p, f)
171 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
171 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
172 n = util.pconvert(fp[striplen:])
172 n = util.pconvert(fp[striplen:])
173 l.append((n, n, st.st_size))
173 l.append((n, n, st.st_size))
174 elif kind == stat.S_IFDIR and recurse:
174 elif kind == stat.S_IFDIR and recurse:
175 visit.append(fp)
175 visit.append(fp)
176 return sorted(l)
176 return sorted(l)
177
177
178 def datafiles(self):
178 def datafiles(self):
179 return self._walk('data', True)
179 return self._walk('data', True)
180
180
181 def walk(self):
181 def walk(self):
182 '''yields (unencoded, encoded, size)'''
182 '''yields (unencoded, encoded, size)'''
183 # yield data files first
183 # yield data files first
184 for x in self.datafiles():
184 for x in self.datafiles():
185 yield x
185 yield x
186 # yield manifest before changelog
186 # yield manifest before changelog
187 for x in reversed(self._walk('', False)):
187 for x in reversed(self._walk('', False)):
188 yield x
188 yield x
189
189
190 def copylist(self):
190 def copylist(self):
191 return ['requires'] + _data.split()
191 return ['requires'] + _data.split()
192
192
193 class encodedstore(basicstore):
193 class encodedstore(basicstore):
194 def __init__(self, path, opener, pathjoiner):
194 def __init__(self, path, opener, pathjoiner):
195 self.pathjoiner = pathjoiner
195 self.pathjoiner = pathjoiner
196 self.path = self.pathjoiner(path, 'store')
196 self.path = self.pathjoiner(path, 'store')
197 self.createmode = _calcmode(self.path)
197 self.createmode = _calcmode(self.path)
198 op = opener(self.path)
198 op = opener(self.path)
199 op.createmode = self.createmode
199 op.createmode = self.createmode
200 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
200 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
201
201
202 def datafiles(self):
202 def datafiles(self):
203 for a, b, size in self._walk('data', True):
203 for a, b, size in self._walk('data', True):
204 try:
204 try:
205 a = decodefilename(a)
205 a = decodefilename(a)
206 except KeyError:
206 except KeyError:
207 a = None
207 a = None
208 yield a, b, size
208 yield a, b, size
209
209
210 def join(self, f):
210 def join(self, f):
211 return self.pathjoiner(self.path, encodefilename(f))
211 return self.pathjoiner(self.path, encodefilename(f))
212
212
213 def copylist(self):
213 def copylist(self):
214 return (['requires', '00changelog.i'] +
214 return (['requires', '00changelog.i'] +
215 [self.pathjoiner('store', f) for f in _data.split()])
215 [self.pathjoiner('store', f) for f in _data.split()])
216
216
217 def fncache(opener):
217 class fncache(object):
218 '''yields the entries in the fncache file'''
219 try:
220 fp = opener('fncache', mode='rb')
221 except IOError:
222 # skip nonexistent file
223 return
224 for n, line in enumerate(fp):
225 if (len(line) < 2) or (line[-1] != '\n'):
226 t = _('invalid entry in fncache, line %s') % (n + 1)
227 raise util.Abort(t)
228 yield line[:-1]
229 fp.close()
230
231 class fncacheopener(object):
232 def __init__(self, opener):
218 def __init__(self, opener):
233 self.opener = opener
219 self.opener = opener
234 self.entries = None
220 self.entries = None
235
221
236 def loadfncache(self):
222 def _load(self):
237 self.entries = set(fncache(self.opener))
223 '''fill the entries from the fncache file'''
224 self.entries = set()
225 try:
226 fp = self.opener('fncache', mode='rb')
227 except IOError:
228 # skip nonexistent file
229 return
230 for n, line in enumerate(fp):
231 if (len(line) < 2) or (line[-1] != '\n'):
232 t = _('invalid entry in fncache, line %s') % (n + 1)
233 raise util.Abort(t)
234 self.entries.add(line[:-1])
235 fp.close()
238
236
239 def __call__(self, path, mode='r', *args, **kw):
237 def rewrite(self, files):
240 if mode not in ('r', 'rb') and path.startswith('data/'):
238 fp = self.opener('fncache', mode='wb')
241 if self.entries is None:
239 for p in files:
242 self.loadfncache()
240 fp.write(p + '\n')
243 if path not in self.entries:
241 fp.close()
244 self.opener('fncache', 'ab').write(path + '\n')
242 self.entries = set(files)
245 # fncache may contain non-existent files after rollback / strip
243
246 self.entries.add(path)
244 def add(self, fn):
247 return self.opener(hybridencode(path), mode, *args, **kw)
245 if self.entries is None:
246 self._load()
247 self.opener('fncache', 'ab').write(fn + '\n')
248
249 def __contains__(self, fn):
250 if self.entries is None:
251 self._load()
252 return fn in self.entries
253
254 def __iter__(self):
255 if self.entries is None:
256 self._load()
257 return iter(self.entries)
248
258
249 class fncachestore(basicstore):
259 class fncachestore(basicstore):
250 def __init__(self, path, opener, pathjoiner):
260 def __init__(self, path, opener, pathjoiner):
251 self.pathjoiner = pathjoiner
261 self.pathjoiner = pathjoiner
252 self.path = self.pathjoiner(path, 'store')
262 self.path = self.pathjoiner(path, 'store')
253 self.createmode = _calcmode(self.path)
263 self.createmode = _calcmode(self.path)
254 self._op = opener(self.path)
264 self._op = opener(self.path)
255 self._op.createmode = self.createmode
265 self._op.createmode = self.createmode
256 self.opener = fncacheopener(self._op)
266 self.fncache = fncache(self._op)
267
268 def fncacheopener(path, mode='r', *args, **kw):
269 if (mode not in ('r', 'rb')
270 and path.startswith('data/')
271 and path not in self.fncache):
272 self.fncache.add(path)
273 return self._op(hybridencode(path), mode, *args, **kw)
274 self.opener = fncacheopener
257
275
258 def join(self, f):
276 def join(self, f):
259 return self.pathjoiner(self.path, hybridencode(f))
277 return self.pathjoiner(self.path, hybridencode(f))
260
278
261 def datafiles(self):
279 def datafiles(self):
262 rewrite = False
280 rewrite = False
263 existing = []
281 existing = []
264 pjoin = self.pathjoiner
282 pjoin = self.pathjoiner
265 spath = self.path
283 spath = self.path
266 for f in fncache(self._op):
284 for f in self.fncache:
267 ef = hybridencode(f)
285 ef = hybridencode(f)
268 try:
286 try:
269 st = os.stat(pjoin(spath, ef))
287 st = os.stat(pjoin(spath, ef))
270 yield f, ef, st.st_size
288 yield f, ef, st.st_size
271 existing.append(f)
289 existing.append(f)
272 except OSError:
290 except OSError:
273 # nonexistent entry
291 # nonexistent entry
274 rewrite = True
292 rewrite = True
275 if rewrite:
293 if rewrite:
276 # rewrite fncache to remove nonexistent entries
294 # rewrite fncache to remove nonexistent entries
277 # (may be caused by rollback / strip)
295 # (may be caused by rollback / strip)
278 fp = self._op('fncache', mode='wb')
296 self.fncache.rewrite(existing)
279 for p in existing:
280 fp.write(p + '\n')
281 fp.close()
282
297
283 def copylist(self):
298 def copylist(self):
284 d = _data + ' dh fncache'
299 d = _data + ' dh fncache'
285 return (['requires', '00changelog.i'] +
300 return (['requires', '00changelog.i'] +
286 [self.pathjoiner('store', f) for f in d.split()])
301 [self.pathjoiner('store', f) for f in d.split()])
287
302
288 def store(requirements, path, opener, pathjoiner=None):
303 def store(requirements, path, opener, pathjoiner=None):
289 pathjoiner = pathjoiner or os.path.join
304 pathjoiner = pathjoiner or os.path.join
290 if 'store' in requirements:
305 if 'store' in requirements:
291 if 'fncache' in requirements:
306 if 'fncache' in requirements:
292 return fncachestore(path, opener, pathjoiner)
307 return fncachestore(path, opener, pathjoiner)
293 return encodedstore(path, opener, pathjoiner)
308 return encodedstore(path, opener, pathjoiner)
294 return basicstore(path, opener, pathjoiner)
309 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now