##// END OF EJS Templates
fncachestore: copy dh directory before the manifest...
Adrian Buehlmann -
r13169:4c1fbed0 stable
parent child Browse files
Show More
@@ -1,338 +1,339 b''
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from i18n import _
8 from i18n import _
9 import osutil, util
9 import osutil, util
10 import os, stat
10 import os, stat
11
11
12 _sha = util.sha1
12 _sha = util.sha1
13
13
14 # This avoids a collision between a file named foo and a dir named
14 # This avoids a collision between a file named foo and a dir named
15 # foo.i or foo.d
15 # foo.i or foo.d
16 def encodedir(path):
16 def encodedir(path):
17 if not path.startswith('data/'):
17 if not path.startswith('data/'):
18 return path
18 return path
19 return (path
19 return (path
20 .replace(".hg/", ".hg.hg/")
20 .replace(".hg/", ".hg.hg/")
21 .replace(".i/", ".i.hg/")
21 .replace(".i/", ".i.hg/")
22 .replace(".d/", ".d.hg/"))
22 .replace(".d/", ".d.hg/"))
23
23
24 def decodedir(path):
24 def decodedir(path):
25 if not path.startswith('data/') or ".hg/" not in path:
25 if not path.startswith('data/') or ".hg/" not in path:
26 return path
26 return path
27 return (path
27 return (path
28 .replace(".d.hg/", ".d/")
28 .replace(".d.hg/", ".d/")
29 .replace(".i.hg/", ".i/")
29 .replace(".i.hg/", ".i/")
30 .replace(".hg.hg/", ".hg/"))
30 .replace(".hg.hg/", ".hg/"))
31
31
32 def _buildencodefun():
32 def _buildencodefun():
33 e = '_'
33 e = '_'
34 win_reserved = [ord(x) for x in '\\:*?"<>|']
34 win_reserved = [ord(x) for x in '\\:*?"<>|']
35 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
35 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
36 for x in (range(32) + range(126, 256) + win_reserved):
36 for x in (range(32) + range(126, 256) + win_reserved):
37 cmap[chr(x)] = "~%02x" % x
37 cmap[chr(x)] = "~%02x" % x
38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
39 cmap[chr(x)] = e + chr(x).lower()
39 cmap[chr(x)] = e + chr(x).lower()
40 dmap = {}
40 dmap = {}
41 for k, v in cmap.iteritems():
41 for k, v in cmap.iteritems():
42 dmap[v] = k
42 dmap[v] = k
43 def decode(s):
43 def decode(s):
44 i = 0
44 i = 0
45 while i < len(s):
45 while i < len(s):
46 for l in xrange(1, 4):
46 for l in xrange(1, 4):
47 try:
47 try:
48 yield dmap[s[i:i + l]]
48 yield dmap[s[i:i + l]]
49 i += l
49 i += l
50 break
50 break
51 except KeyError:
51 except KeyError:
52 pass
52 pass
53 else:
53 else:
54 raise KeyError
54 raise KeyError
55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
56 lambda s: decodedir("".join(list(decode(s)))))
56 lambda s: decodedir("".join(list(decode(s)))))
57
57
58 encodefilename, decodefilename = _buildencodefun()
58 encodefilename, decodefilename = _buildencodefun()
59
59
60 def _build_lower_encodefun():
60 def _build_lower_encodefun():
61 win_reserved = [ord(x) for x in '\\:*?"<>|']
61 win_reserved = [ord(x) for x in '\\:*?"<>|']
62 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
62 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
63 for x in (range(32) + range(126, 256) + win_reserved):
63 for x in (range(32) + range(126, 256) + win_reserved):
64 cmap[chr(x)] = "~%02x" % x
64 cmap[chr(x)] = "~%02x" % x
65 for x in range(ord("A"), ord("Z")+1):
65 for x in range(ord("A"), ord("Z")+1):
66 cmap[chr(x)] = chr(x).lower()
66 cmap[chr(x)] = chr(x).lower()
67 return lambda s: "".join([cmap[c] for c in s])
67 return lambda s: "".join([cmap[c] for c in s])
68
68
69 lowerencode = _build_lower_encodefun()
69 lowerencode = _build_lower_encodefun()
70
70
71 _windows_reserved_filenames = '''con prn aux nul
71 _windows_reserved_filenames = '''con prn aux nul
72 com1 com2 com3 com4 com5 com6 com7 com8 com9
72 com1 com2 com3 com4 com5 com6 com7 com8 com9
73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
74 def _auxencode(path, dotencode):
74 def _auxencode(path, dotencode):
75 res = []
75 res = []
76 for n in path.split('/'):
76 for n in path.split('/'):
77 if n:
77 if n:
78 base = n.split('.')[0]
78 base = n.split('.')[0]
79 if base and (base in _windows_reserved_filenames):
79 if base and (base in _windows_reserved_filenames):
80 # encode third letter ('aux' -> 'au~78')
80 # encode third letter ('aux' -> 'au~78')
81 ec = "~%02x" % ord(n[2])
81 ec = "~%02x" % ord(n[2])
82 n = n[0:2] + ec + n[3:]
82 n = n[0:2] + ec + n[3:]
83 if n[-1] in '. ':
83 if n[-1] in '. ':
84 # encode last period or space ('foo...' -> 'foo..~2e')
84 # encode last period or space ('foo...' -> 'foo..~2e')
85 n = n[:-1] + "~%02x" % ord(n[-1])
85 n = n[:-1] + "~%02x" % ord(n[-1])
86 if dotencode and n[0] in '. ':
86 if dotencode and n[0] in '. ':
87 n = "~%02x" % ord(n[0]) + n[1:]
87 n = "~%02x" % ord(n[0]) + n[1:]
88 res.append(n)
88 res.append(n)
89 return '/'.join(res)
89 return '/'.join(res)
90
90
91 MAX_PATH_LEN_IN_HGSTORE = 120
91 MAX_PATH_LEN_IN_HGSTORE = 120
92 DIR_PREFIX_LEN = 8
92 DIR_PREFIX_LEN = 8
93 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
93 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
94 def _hybridencode(path, auxencode):
94 def _hybridencode(path, auxencode):
95 '''encodes path with a length limit
95 '''encodes path with a length limit
96
96
97 Encodes all paths that begin with 'data/', according to the following.
97 Encodes all paths that begin with 'data/', according to the following.
98
98
99 Default encoding (reversible):
99 Default encoding (reversible):
100
100
101 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
101 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
102 characters are encoded as '~xx', where xx is the two digit hex code
102 characters are encoded as '~xx', where xx is the two digit hex code
103 of the character (see encodefilename).
103 of the character (see encodefilename).
104 Relevant path components consisting of Windows reserved filenames are
104 Relevant path components consisting of Windows reserved filenames are
105 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
105 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
106
106
107 Hashed encoding (not reversible):
107 Hashed encoding (not reversible):
108
108
109 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
109 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
110 non-reversible hybrid hashing of the path is done instead.
110 non-reversible hybrid hashing of the path is done instead.
111 This encoding uses up to DIR_PREFIX_LEN characters of all directory
111 This encoding uses up to DIR_PREFIX_LEN characters of all directory
112 levels of the lowerencoded path, but not more levels than can fit into
112 levels of the lowerencoded path, but not more levels than can fit into
113 _MAX_SHORTENED_DIRS_LEN.
113 _MAX_SHORTENED_DIRS_LEN.
114 Then follows the filler followed by the sha digest of the full path.
114 Then follows the filler followed by the sha digest of the full path.
115 The filler is the beginning of the basename of the lowerencoded path
115 The filler is the beginning of the basename of the lowerencoded path
116 (the basename is everything after the last path separator). The filler
116 (the basename is everything after the last path separator). The filler
117 is as long as possible, filling in characters from the basename until
117 is as long as possible, filling in characters from the basename until
118 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
118 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
119 of the basename have been taken).
119 of the basename have been taken).
120 The extension (e.g. '.i' or '.d') is preserved.
120 The extension (e.g. '.i' or '.d') is preserved.
121
121
122 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
122 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
123 encoding was used.
123 encoding was used.
124 '''
124 '''
125 if not path.startswith('data/'):
125 if not path.startswith('data/'):
126 return path
126 return path
127 # escape directories ending with .i and .d
127 # escape directories ending with .i and .d
128 path = encodedir(path)
128 path = encodedir(path)
129 ndpath = path[len('data/'):]
129 ndpath = path[len('data/'):]
130 res = 'data/' + auxencode(encodefilename(ndpath))
130 res = 'data/' + auxencode(encodefilename(ndpath))
131 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
131 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
132 digest = _sha(path).hexdigest()
132 digest = _sha(path).hexdigest()
133 aep = auxencode(lowerencode(ndpath))
133 aep = auxencode(lowerencode(ndpath))
134 _root, ext = os.path.splitext(aep)
134 _root, ext = os.path.splitext(aep)
135 parts = aep.split('/')
135 parts = aep.split('/')
136 basename = parts[-1]
136 basename = parts[-1]
137 sdirs = []
137 sdirs = []
138 for p in parts[:-1]:
138 for p in parts[:-1]:
139 d = p[:DIR_PREFIX_LEN]
139 d = p[:DIR_PREFIX_LEN]
140 if d[-1] in '. ':
140 if d[-1] in '. ':
141 # Windows can't access dirs ending in period or space
141 # Windows can't access dirs ending in period or space
142 d = d[:-1] + '_'
142 d = d[:-1] + '_'
143 t = '/'.join(sdirs) + '/' + d
143 t = '/'.join(sdirs) + '/' + d
144 if len(t) > _MAX_SHORTENED_DIRS_LEN:
144 if len(t) > _MAX_SHORTENED_DIRS_LEN:
145 break
145 break
146 sdirs.append(d)
146 sdirs.append(d)
147 dirs = '/'.join(sdirs)
147 dirs = '/'.join(sdirs)
148 if len(dirs) > 0:
148 if len(dirs) > 0:
149 dirs += '/'
149 dirs += '/'
150 res = 'dh/' + dirs + digest + ext
150 res = 'dh/' + dirs + digest + ext
151 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
151 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
152 if space_left > 0:
152 if space_left > 0:
153 filler = basename[:space_left]
153 filler = basename[:space_left]
154 res = 'dh/' + dirs + filler + digest + ext
154 res = 'dh/' + dirs + filler + digest + ext
155 return res
155 return res
156
156
157 def _calcmode(path):
157 def _calcmode(path):
158 try:
158 try:
159 # files in .hg/ will be created using this mode
159 # files in .hg/ will be created using this mode
160 mode = os.stat(path).st_mode
160 mode = os.stat(path).st_mode
161 # avoid some useless chmods
161 # avoid some useless chmods
162 if (0777 & ~util.umask) == (0777 & mode):
162 if (0777 & ~util.umask) == (0777 & mode):
163 mode = None
163 mode = None
164 except OSError:
164 except OSError:
165 mode = None
165 mode = None
166 return mode
166 return mode
167
167
168 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
168 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
169
169
170 class basicstore(object):
170 class basicstore(object):
171 '''base class for local repository stores'''
171 '''base class for local repository stores'''
172 def __init__(self, path, opener, pathjoiner):
172 def __init__(self, path, opener, pathjoiner):
173 self.pathjoiner = pathjoiner
173 self.pathjoiner = pathjoiner
174 self.path = path
174 self.path = path
175 self.createmode = _calcmode(path)
175 self.createmode = _calcmode(path)
176 op = opener(self.path)
176 op = opener(self.path)
177 op.createmode = self.createmode
177 op.createmode = self.createmode
178 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
178 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
179
179
180 def join(self, f):
180 def join(self, f):
181 return self.pathjoiner(self.path, encodedir(f))
181 return self.pathjoiner(self.path, encodedir(f))
182
182
183 def _walk(self, relpath, recurse):
183 def _walk(self, relpath, recurse):
184 '''yields (unencoded, encoded, size)'''
184 '''yields (unencoded, encoded, size)'''
185 path = self.pathjoiner(self.path, relpath)
185 path = self.pathjoiner(self.path, relpath)
186 striplen = len(self.path) + len(os.sep)
186 striplen = len(self.path) + len(os.sep)
187 l = []
187 l = []
188 if os.path.isdir(path):
188 if os.path.isdir(path):
189 visit = [path]
189 visit = [path]
190 while visit:
190 while visit:
191 p = visit.pop()
191 p = visit.pop()
192 for f, kind, st in osutil.listdir(p, stat=True):
192 for f, kind, st in osutil.listdir(p, stat=True):
193 fp = self.pathjoiner(p, f)
193 fp = self.pathjoiner(p, f)
194 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
194 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
195 n = util.pconvert(fp[striplen:])
195 n = util.pconvert(fp[striplen:])
196 l.append((decodedir(n), n, st.st_size))
196 l.append((decodedir(n), n, st.st_size))
197 elif kind == stat.S_IFDIR and recurse:
197 elif kind == stat.S_IFDIR and recurse:
198 visit.append(fp)
198 visit.append(fp)
199 return sorted(l)
199 return sorted(l)
200
200
201 def datafiles(self):
201 def datafiles(self):
202 return self._walk('data', True)
202 return self._walk('data', True)
203
203
204 def walk(self):
204 def walk(self):
205 '''yields (unencoded, encoded, size)'''
205 '''yields (unencoded, encoded, size)'''
206 # yield data files first
206 # yield data files first
207 for x in self.datafiles():
207 for x in self.datafiles():
208 yield x
208 yield x
209 # yield manifest before changelog
209 # yield manifest before changelog
210 for x in reversed(self._walk('', False)):
210 for x in reversed(self._walk('', False)):
211 yield x
211 yield x
212
212
213 def copylist(self):
213 def copylist(self):
214 return ['requires'] + _data.split()
214 return ['requires'] + _data.split()
215
215
216 class encodedstore(basicstore):
216 class encodedstore(basicstore):
217 def __init__(self, path, opener, pathjoiner):
217 def __init__(self, path, opener, pathjoiner):
218 self.pathjoiner = pathjoiner
218 self.pathjoiner = pathjoiner
219 self.path = self.pathjoiner(path, 'store')
219 self.path = self.pathjoiner(path, 'store')
220 self.createmode = _calcmode(self.path)
220 self.createmode = _calcmode(self.path)
221 op = opener(self.path)
221 op = opener(self.path)
222 op.createmode = self.createmode
222 op.createmode = self.createmode
223 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
223 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
224
224
225 def datafiles(self):
225 def datafiles(self):
226 for a, b, size in self._walk('data', True):
226 for a, b, size in self._walk('data', True):
227 try:
227 try:
228 a = decodefilename(a)
228 a = decodefilename(a)
229 except KeyError:
229 except KeyError:
230 a = None
230 a = None
231 yield a, b, size
231 yield a, b, size
232
232
233 def join(self, f):
233 def join(self, f):
234 return self.pathjoiner(self.path, encodefilename(f))
234 return self.pathjoiner(self.path, encodefilename(f))
235
235
236 def copylist(self):
236 def copylist(self):
237 return (['requires', '00changelog.i'] +
237 return (['requires', '00changelog.i'] +
238 [self.pathjoiner('store', f) for f in _data.split()])
238 [self.pathjoiner('store', f) for f in _data.split()])
239
239
240 class fncache(object):
240 class fncache(object):
241 # the filename used to be partially encoded
241 # the filename used to be partially encoded
242 # hence the encodedir/decodedir dance
242 # hence the encodedir/decodedir dance
243 def __init__(self, opener):
243 def __init__(self, opener):
244 self.opener = opener
244 self.opener = opener
245 self.entries = None
245 self.entries = None
246
246
247 def _load(self):
247 def _load(self):
248 '''fill the entries from the fncache file'''
248 '''fill the entries from the fncache file'''
249 self.entries = set()
249 self.entries = set()
250 try:
250 try:
251 fp = self.opener('fncache', mode='rb')
251 fp = self.opener('fncache', mode='rb')
252 except IOError:
252 except IOError:
253 # skip nonexistent file
253 # skip nonexistent file
254 return
254 return
255 for n, line in enumerate(fp):
255 for n, line in enumerate(fp):
256 if (len(line) < 2) or (line[-1] != '\n'):
256 if (len(line) < 2) or (line[-1] != '\n'):
257 t = _('invalid entry in fncache, line %s') % (n + 1)
257 t = _('invalid entry in fncache, line %s') % (n + 1)
258 raise util.Abort(t)
258 raise util.Abort(t)
259 self.entries.add(decodedir(line[:-1]))
259 self.entries.add(decodedir(line[:-1]))
260 fp.close()
260 fp.close()
261
261
262 def rewrite(self, files):
262 def rewrite(self, files):
263 fp = self.opener('fncache', mode='wb')
263 fp = self.opener('fncache', mode='wb')
264 for p in files:
264 for p in files:
265 fp.write(encodedir(p) + '\n')
265 fp.write(encodedir(p) + '\n')
266 fp.close()
266 fp.close()
267 self.entries = set(files)
267 self.entries = set(files)
268
268
269 def add(self, fn):
269 def add(self, fn):
270 if self.entries is None:
270 if self.entries is None:
271 self._load()
271 self._load()
272 if fn not in self.entries:
272 if fn not in self.entries:
273 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
273 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
274 self.entries.add(fn)
274 self.entries.add(fn)
275
275
276 def __contains__(self, fn):
276 def __contains__(self, fn):
277 if self.entries is None:
277 if self.entries is None:
278 self._load()
278 self._load()
279 return fn in self.entries
279 return fn in self.entries
280
280
281 def __iter__(self):
281 def __iter__(self):
282 if self.entries is None:
282 if self.entries is None:
283 self._load()
283 self._load()
284 return iter(self.entries)
284 return iter(self.entries)
285
285
286 class fncachestore(basicstore):
286 class fncachestore(basicstore):
287 def __init__(self, path, opener, pathjoiner, encode):
287 def __init__(self, path, opener, pathjoiner, encode):
288 self.encode = encode
288 self.encode = encode
289 self.pathjoiner = pathjoiner
289 self.pathjoiner = pathjoiner
290 self.path = self.pathjoiner(path, 'store')
290 self.path = self.pathjoiner(path, 'store')
291 self.createmode = _calcmode(self.path)
291 self.createmode = _calcmode(self.path)
292 op = opener(self.path)
292 op = opener(self.path)
293 op.createmode = self.createmode
293 op.createmode = self.createmode
294 fnc = fncache(op)
294 fnc = fncache(op)
295 self.fncache = fnc
295 self.fncache = fnc
296
296
297 def fncacheopener(path, mode='r', *args, **kw):
297 def fncacheopener(path, mode='r', *args, **kw):
298 if mode not in ('r', 'rb') and path.startswith('data/'):
298 if mode not in ('r', 'rb') and path.startswith('data/'):
299 fnc.add(path)
299 fnc.add(path)
300 return op(self.encode(path), mode, *args, **kw)
300 return op(self.encode(path), mode, *args, **kw)
301 self.opener = fncacheopener
301 self.opener = fncacheopener
302
302
303 def join(self, f):
303 def join(self, f):
304 return self.pathjoiner(self.path, self.encode(f))
304 return self.pathjoiner(self.path, self.encode(f))
305
305
306 def datafiles(self):
306 def datafiles(self):
307 rewrite = False
307 rewrite = False
308 existing = []
308 existing = []
309 pjoin = self.pathjoiner
309 pjoin = self.pathjoiner
310 spath = self.path
310 spath = self.path
311 for f in self.fncache:
311 for f in self.fncache:
312 ef = self.encode(f)
312 ef = self.encode(f)
313 try:
313 try:
314 st = os.stat(pjoin(spath, ef))
314 st = os.stat(pjoin(spath, ef))
315 yield f, ef, st.st_size
315 yield f, ef, st.st_size
316 existing.append(f)
316 existing.append(f)
317 except OSError:
317 except OSError:
318 # nonexistent entry
318 # nonexistent entry
319 rewrite = True
319 rewrite = True
320 if rewrite:
320 if rewrite:
321 # rewrite fncache to remove nonexistent entries
321 # rewrite fncache to remove nonexistent entries
322 # (may be caused by rollback / strip)
322 # (may be caused by rollback / strip)
323 self.fncache.rewrite(existing)
323 self.fncache.rewrite(existing)
324
324
325 def copylist(self):
325 def copylist(self):
326 d = _data + ' dh fncache'
326 d = ('data dh fncache'
327 ' 00manifest.d 00manifest.i 00changelog.d 00changelog.i')
327 return (['requires', '00changelog.i'] +
328 return (['requires', '00changelog.i'] +
328 [self.pathjoiner('store', f) for f in d.split()])
329 [self.pathjoiner('store', f) for f in d.split()])
329
330
330 def store(requirements, path, opener, pathjoiner=None):
331 def store(requirements, path, opener, pathjoiner=None):
331 pathjoiner = pathjoiner or os.path.join
332 pathjoiner = pathjoiner or os.path.join
332 if 'store' in requirements:
333 if 'store' in requirements:
333 if 'fncache' in requirements:
334 if 'fncache' in requirements:
334 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
335 auxencode = lambda f: _auxencode(f, 'dotencode' in requirements)
335 encode = lambda f: _hybridencode(f, auxencode)
336 encode = lambda f: _hybridencode(f, auxencode)
336 return fncachestore(path, opener, pathjoiner, encode)
337 return fncachestore(path, opener, pathjoiner, encode)
337 return encodedstore(path, opener, pathjoiner)
338 return encodedstore(path, opener, pathjoiner)
338 return basicstore(path, opener, pathjoiner)
339 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now