##// END OF EJS Templates
store: use set instead of dict
Benoit Boissinot -
r8467:9890151a default
parent child Browse files
Show More
@@ -1,296 +1,296
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 from i18n import _
8 from i18n import _
9 import osutil, util
9 import osutil, util
10 import os, stat
10 import os, stat
11
11
12 _sha = util.sha1
12 _sha = util.sha1
13
13
14 def _buildencodefun():
14 def _buildencodefun():
15 e = '_'
15 e = '_'
16 win_reserved = [ord(x) for x in '\\:*?"<>|']
16 win_reserved = [ord(x) for x in '\\:*?"<>|']
17 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
17 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
18 for x in (range(32) + range(126, 256) + win_reserved):
18 for x in (range(32) + range(126, 256) + win_reserved):
19 cmap[chr(x)] = "~%02x" % x
19 cmap[chr(x)] = "~%02x" % x
20 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
20 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
21 cmap[chr(x)] = e + chr(x).lower()
21 cmap[chr(x)] = e + chr(x).lower()
22 dmap = {}
22 dmap = {}
23 for k, v in cmap.iteritems():
23 for k, v in cmap.iteritems():
24 dmap[v] = k
24 dmap[v] = k
25 def decode(s):
25 def decode(s):
26 i = 0
26 i = 0
27 while i < len(s):
27 while i < len(s):
28 for l in xrange(1, 4):
28 for l in xrange(1, 4):
29 try:
29 try:
30 yield dmap[s[i:i+l]]
30 yield dmap[s[i:i+l]]
31 i += l
31 i += l
32 break
32 break
33 except KeyError:
33 except KeyError:
34 pass
34 pass
35 else:
35 else:
36 raise KeyError
36 raise KeyError
37 return (lambda s: "".join([cmap[c] for c in s]),
37 return (lambda s: "".join([cmap[c] for c in s]),
38 lambda s: "".join(list(decode(s))))
38 lambda s: "".join(list(decode(s))))
39
39
40 encodefilename, decodefilename = _buildencodefun()
40 encodefilename, decodefilename = _buildencodefun()
41
41
42 def _build_lower_encodefun():
42 def _build_lower_encodefun():
43 win_reserved = [ord(x) for x in '\\:*?"<>|']
43 win_reserved = [ord(x) for x in '\\:*?"<>|']
44 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
44 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
45 for x in (range(32) + range(126, 256) + win_reserved):
45 for x in (range(32) + range(126, 256) + win_reserved):
46 cmap[chr(x)] = "~%02x" % x
46 cmap[chr(x)] = "~%02x" % x
47 for x in range(ord("A"), ord("Z")+1):
47 for x in range(ord("A"), ord("Z")+1):
48 cmap[chr(x)] = chr(x).lower()
48 cmap[chr(x)] = chr(x).lower()
49 return lambda s: "".join([cmap[c] for c in s])
49 return lambda s: "".join([cmap[c] for c in s])
50
50
51 lowerencode = _build_lower_encodefun()
51 lowerencode = _build_lower_encodefun()
52
52
53 _windows_reserved_filenames = '''con prn aux nul
53 _windows_reserved_filenames = '''con prn aux nul
54 com1 com2 com3 com4 com5 com6 com7 com8 com9
54 com1 com2 com3 com4 com5 com6 com7 com8 com9
55 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
55 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
56 def auxencode(path):
56 def auxencode(path):
57 res = []
57 res = []
58 for n in path.split('/'):
58 for n in path.split('/'):
59 if n:
59 if n:
60 base = n.split('.')[0]
60 base = n.split('.')[0]
61 if base and (base in _windows_reserved_filenames):
61 if base and (base in _windows_reserved_filenames):
62 # encode third letter ('aux' -> 'au~78')
62 # encode third letter ('aux' -> 'au~78')
63 ec = "~%02x" % ord(n[2])
63 ec = "~%02x" % ord(n[2])
64 n = n[0:2] + ec + n[3:]
64 n = n[0:2] + ec + n[3:]
65 if n[-1] in '. ':
65 if n[-1] in '. ':
66 # encode last period or space ('foo...' -> 'foo..~2e')
66 # encode last period or space ('foo...' -> 'foo..~2e')
67 n = n[:-1] + "~%02x" % ord(n[-1])
67 n = n[:-1] + "~%02x" % ord(n[-1])
68 res.append(n)
68 res.append(n)
69 return '/'.join(res)
69 return '/'.join(res)
70
70
71 MAX_PATH_LEN_IN_HGSTORE = 120
71 MAX_PATH_LEN_IN_HGSTORE = 120
72 DIR_PREFIX_LEN = 8
72 DIR_PREFIX_LEN = 8
73 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
73 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
74 def hybridencode(path):
74 def hybridencode(path):
75 '''encodes path with a length limit
75 '''encodes path with a length limit
76
76
77 Encodes all paths that begin with 'data/', according to the following.
77 Encodes all paths that begin with 'data/', according to the following.
78
78
79 Default encoding (reversible):
79 Default encoding (reversible):
80
80
81 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
81 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
82 characters are encoded as '~xx', where xx is the two digit hex code
82 characters are encoded as '~xx', where xx is the two digit hex code
83 of the character (see encodefilename).
83 of the character (see encodefilename).
84 Relevant path components consisting of Windows reserved filenames are
84 Relevant path components consisting of Windows reserved filenames are
85 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
85 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
86
86
87 Hashed encoding (not reversible):
87 Hashed encoding (not reversible):
88
88
89 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
89 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
90 non-reversible hybrid hashing of the path is done instead.
90 non-reversible hybrid hashing of the path is done instead.
91 This encoding uses up to DIR_PREFIX_LEN characters of all directory
91 This encoding uses up to DIR_PREFIX_LEN characters of all directory
92 levels of the lowerencoded path, but not more levels than can fit into
92 levels of the lowerencoded path, but not more levels than can fit into
93 _MAX_SHORTENED_DIRS_LEN.
93 _MAX_SHORTENED_DIRS_LEN.
94 Then follows the filler followed by the sha digest of the full path.
94 Then follows the filler followed by the sha digest of the full path.
95 The filler is the beginning of the basename of the lowerencoded path
95 The filler is the beginning of the basename of the lowerencoded path
96 (the basename is everything after the last path separator). The filler
96 (the basename is everything after the last path separator). The filler
97 is as long as possible, filling in characters from the basename until
97 is as long as possible, filling in characters from the basename until
98 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
98 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
99 of the basename have been taken).
99 of the basename have been taken).
100 The extension (e.g. '.i' or '.d') is preserved.
100 The extension (e.g. '.i' or '.d') is preserved.
101
101
102 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
102 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
103 encoding was used.
103 encoding was used.
104 '''
104 '''
105 if not path.startswith('data/'):
105 if not path.startswith('data/'):
106 return path
106 return path
107 ndpath = path[len('data/'):]
107 ndpath = path[len('data/'):]
108 res = 'data/' + auxencode(encodefilename(ndpath))
108 res = 'data/' + auxencode(encodefilename(ndpath))
109 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
109 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
110 digest = _sha(path).hexdigest()
110 digest = _sha(path).hexdigest()
111 aep = auxencode(lowerencode(ndpath))
111 aep = auxencode(lowerencode(ndpath))
112 _root, ext = os.path.splitext(aep)
112 _root, ext = os.path.splitext(aep)
113 parts = aep.split('/')
113 parts = aep.split('/')
114 basename = parts[-1]
114 basename = parts[-1]
115 sdirs = []
115 sdirs = []
116 for p in parts[:-1]:
116 for p in parts[:-1]:
117 d = p[:DIR_PREFIX_LEN]
117 d = p[:DIR_PREFIX_LEN]
118 if d[-1] in '. ':
118 if d[-1] in '. ':
119 # Windows can't access dirs ending in period or space
119 # Windows can't access dirs ending in period or space
120 d = d[:-1] + '_'
120 d = d[:-1] + '_'
121 t = '/'.join(sdirs) + '/' + d
121 t = '/'.join(sdirs) + '/' + d
122 if len(t) > _MAX_SHORTENED_DIRS_LEN:
122 if len(t) > _MAX_SHORTENED_DIRS_LEN:
123 break
123 break
124 sdirs.append(d)
124 sdirs.append(d)
125 dirs = '/'.join(sdirs)
125 dirs = '/'.join(sdirs)
126 if len(dirs) > 0:
126 if len(dirs) > 0:
127 dirs += '/'
127 dirs += '/'
128 res = 'dh/' + dirs + digest + ext
128 res = 'dh/' + dirs + digest + ext
129 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
129 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
130 if space_left > 0:
130 if space_left > 0:
131 filler = basename[:space_left]
131 filler = basename[:space_left]
132 res = 'dh/' + dirs + filler + digest + ext
132 res = 'dh/' + dirs + filler + digest + ext
133 return res
133 return res
134
134
135 def _calcmode(path):
135 def _calcmode(path):
136 try:
136 try:
137 # files in .hg/ will be created using this mode
137 # files in .hg/ will be created using this mode
138 mode = os.stat(path).st_mode
138 mode = os.stat(path).st_mode
139 # avoid some useless chmods
139 # avoid some useless chmods
140 if (0777 & ~util.umask) == (0777 & mode):
140 if (0777 & ~util.umask) == (0777 & mode):
141 mode = None
141 mode = None
142 except OSError:
142 except OSError:
143 mode = None
143 mode = None
144 return mode
144 return mode
145
145
146 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
146 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
147
147
148 class basicstore:
148 class basicstore:
149 '''base class for local repository stores'''
149 '''base class for local repository stores'''
150 def __init__(self, path, opener, pathjoiner):
150 def __init__(self, path, opener, pathjoiner):
151 self.pathjoiner = pathjoiner
151 self.pathjoiner = pathjoiner
152 self.path = path
152 self.path = path
153 self.createmode = _calcmode(path)
153 self.createmode = _calcmode(path)
154 self.opener = opener(self.path)
154 self.opener = opener(self.path)
155 self.opener.createmode = self.createmode
155 self.opener.createmode = self.createmode
156
156
157 def join(self, f):
157 def join(self, f):
158 return self.pathjoiner(self.path, f)
158 return self.pathjoiner(self.path, f)
159
159
160 def _walk(self, relpath, recurse):
160 def _walk(self, relpath, recurse):
161 '''yields (unencoded, encoded, size)'''
161 '''yields (unencoded, encoded, size)'''
162 path = self.pathjoiner(self.path, relpath)
162 path = self.pathjoiner(self.path, relpath)
163 striplen = len(self.path) + len(os.sep)
163 striplen = len(self.path) + len(os.sep)
164 l = []
164 l = []
165 if os.path.isdir(path):
165 if os.path.isdir(path):
166 visit = [path]
166 visit = [path]
167 while visit:
167 while visit:
168 p = visit.pop()
168 p = visit.pop()
169 for f, kind, st in osutil.listdir(p, stat=True):
169 for f, kind, st in osutil.listdir(p, stat=True):
170 fp = self.pathjoiner(p, f)
170 fp = self.pathjoiner(p, f)
171 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
171 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
172 n = util.pconvert(fp[striplen:])
172 n = util.pconvert(fp[striplen:])
173 l.append((n, n, st.st_size))
173 l.append((n, n, st.st_size))
174 elif kind == stat.S_IFDIR and recurse:
174 elif kind == stat.S_IFDIR and recurse:
175 visit.append(fp)
175 visit.append(fp)
176 return sorted(l)
176 return sorted(l)
177
177
178 def datafiles(self):
178 def datafiles(self):
179 return self._walk('data', True)
179 return self._walk('data', True)
180
180
181 def walk(self):
181 def walk(self):
182 '''yields (unencoded, encoded, size)'''
182 '''yields (unencoded, encoded, size)'''
183 # yield data files first
183 # yield data files first
184 for x in self.datafiles():
184 for x in self.datafiles():
185 yield x
185 yield x
186 # yield manifest before changelog
186 # yield manifest before changelog
187 for x in reversed(self._walk('', False)):
187 for x in reversed(self._walk('', False)):
188 yield x
188 yield x
189
189
190 def copylist(self):
190 def copylist(self):
191 return ['requires'] + _data.split()
191 return ['requires'] + _data.split()
192
192
193 class encodedstore(basicstore):
193 class encodedstore(basicstore):
194 def __init__(self, path, opener, pathjoiner):
194 def __init__(self, path, opener, pathjoiner):
195 self.pathjoiner = pathjoiner
195 self.pathjoiner = pathjoiner
196 self.path = self.pathjoiner(path, 'store')
196 self.path = self.pathjoiner(path, 'store')
197 self.createmode = _calcmode(self.path)
197 self.createmode = _calcmode(self.path)
198 op = opener(self.path)
198 op = opener(self.path)
199 op.createmode = self.createmode
199 op.createmode = self.createmode
200 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
200 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
201
201
202 def datafiles(self):
202 def datafiles(self):
203 for a, b, size in self._walk('data', True):
203 for a, b, size in self._walk('data', True):
204 try:
204 try:
205 a = decodefilename(a)
205 a = decodefilename(a)
206 except KeyError:
206 except KeyError:
207 a = None
207 a = None
208 yield a, b, size
208 yield a, b, size
209
209
210 def join(self, f):
210 def join(self, f):
211 return self.pathjoiner(self.path, encodefilename(f))
211 return self.pathjoiner(self.path, encodefilename(f))
212
212
213 def copylist(self):
213 def copylist(self):
214 return (['requires', '00changelog.i'] +
214 return (['requires', '00changelog.i'] +
215 [self.pathjoiner('store', f) for f in _data.split()])
215 [self.pathjoiner('store', f) for f in _data.split()])
216
216
217 def fncache(opener):
217 def fncache(opener):
218 '''yields the entries in the fncache file'''
218 '''yields the entries in the fncache file'''
219 try:
219 try:
220 fp = opener('fncache', mode='rb')
220 fp = opener('fncache', mode='rb')
221 except IOError:
221 except IOError:
222 # skip nonexistent file
222 # skip nonexistent file
223 return
223 return
224 for n, line in enumerate(fp):
224 for n, line in enumerate(fp):
225 if (len(line) < 2) or (line[-1] != '\n'):
225 if (len(line) < 2) or (line[-1] != '\n'):
226 t = _('invalid entry in fncache, line %s') % (n + 1)
226 t = _('invalid entry in fncache, line %s') % (n + 1)
227 raise util.Abort(t)
227 raise util.Abort(t)
228 yield line[:-1]
228 yield line[:-1]
229 fp.close()
229 fp.close()
230
230
231 class fncacheopener(object):
231 class fncacheopener(object):
232 def __init__(self, opener):
232 def __init__(self, opener):
233 self.opener = opener
233 self.opener = opener
234 self.entries = None
234 self.entries = None
235
235
236 def loadfncache(self):
236 def loadfncache(self):
237 self.entries = {}
237 self.entries = set()
238 for f in fncache(self.opener):
238 for f in fncache(self.opener):
239 self.entries[f] = True
239 self.entries.add(f)
240
240
241 def __call__(self, path, mode='r', *args, **kw):
241 def __call__(self, path, mode='r', *args, **kw):
242 if mode not in ('r', 'rb') and path.startswith('data/'):
242 if mode not in ('r', 'rb') and path.startswith('data/'):
243 if self.entries is None:
243 if self.entries is None:
244 self.loadfncache()
244 self.loadfncache()
245 if path not in self.entries:
245 if path not in self.entries:
246 self.opener('fncache', 'ab').write(path + '\n')
246 self.opener('fncache', 'ab').write(path + '\n')
247 # fncache may contain non-existent files after rollback / strip
247 # fncache may contain non-existent files after rollback / strip
248 self.entries[path] = True
248 self.entries.add(path)
249 return self.opener(hybridencode(path), mode, *args, **kw)
249 return self.opener(hybridencode(path), mode, *args, **kw)
250
250
251 class fncachestore(basicstore):
251 class fncachestore(basicstore):
252 def __init__(self, path, opener, pathjoiner):
252 def __init__(self, path, opener, pathjoiner):
253 self.pathjoiner = pathjoiner
253 self.pathjoiner = pathjoiner
254 self.path = self.pathjoiner(path, 'store')
254 self.path = self.pathjoiner(path, 'store')
255 self.createmode = _calcmode(self.path)
255 self.createmode = _calcmode(self.path)
256 self._op = opener(self.path)
256 self._op = opener(self.path)
257 self._op.createmode = self.createmode
257 self._op.createmode = self.createmode
258 self.opener = fncacheopener(self._op)
258 self.opener = fncacheopener(self._op)
259
259
260 def join(self, f):
260 def join(self, f):
261 return self.pathjoiner(self.path, hybridencode(f))
261 return self.pathjoiner(self.path, hybridencode(f))
262
262
263 def datafiles(self):
263 def datafiles(self):
264 rewrite = False
264 rewrite = False
265 existing = []
265 existing = []
266 pjoin = self.pathjoiner
266 pjoin = self.pathjoiner
267 spath = self.path
267 spath = self.path
268 for f in fncache(self._op):
268 for f in fncache(self._op):
269 ef = hybridencode(f)
269 ef = hybridencode(f)
270 try:
270 try:
271 st = os.stat(pjoin(spath, ef))
271 st = os.stat(pjoin(spath, ef))
272 yield f, ef, st.st_size
272 yield f, ef, st.st_size
273 existing.append(f)
273 existing.append(f)
274 except OSError:
274 except OSError:
275 # nonexistent entry
275 # nonexistent entry
276 rewrite = True
276 rewrite = True
277 if rewrite:
277 if rewrite:
278 # rewrite fncache to remove nonexistent entries
278 # rewrite fncache to remove nonexistent entries
279 # (may be caused by rollback / strip)
279 # (may be caused by rollback / strip)
280 fp = self._op('fncache', mode='wb')
280 fp = self._op('fncache', mode='wb')
281 for p in existing:
281 for p in existing:
282 fp.write(p + '\n')
282 fp.write(p + '\n')
283 fp.close()
283 fp.close()
284
284
285 def copylist(self):
285 def copylist(self):
286 d = _data + ' dh fncache'
286 d = _data + ' dh fncache'
287 return (['requires', '00changelog.i'] +
287 return (['requires', '00changelog.i'] +
288 [self.pathjoiner('store', f) for f in d.split()])
288 [self.pathjoiner('store', f) for f in d.split()])
289
289
290 def store(requirements, path, opener, pathjoiner=None):
290 def store(requirements, path, opener, pathjoiner=None):
291 pathjoiner = pathjoiner or os.path.join
291 pathjoiner = pathjoiner or os.path.join
292 if 'store' in requirements:
292 if 'store' in requirements:
293 if 'fncache' in requirements:
293 if 'fncache' in requirements:
294 return fncachestore(path, opener, pathjoiner)
294 return fncachestore(path, opener, pathjoiner)
295 return encodedstore(path, opener, pathjoiner)
295 return encodedstore(path, opener, pathjoiner)
296 return basicstore(path, opener, pathjoiner)
296 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now