##// END OF EJS Templates
store: create set directly from iterable
Martin Geisler -
r8480:0e91bcaa default
parent child Browse files
Show More
@@ -1,296 +1,294
1 # store.py - repository store handling for Mercurial
1 # store.py - repository store handling for Mercurial
2 #
2 #
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2, incorporated herein by reference.
6 # GNU General Public License version 2, incorporated herein by reference.
7
7
8 from i18n import _
8 from i18n import _
9 import osutil, util
9 import osutil, util
10 import os, stat
10 import os, stat
11
11
12 _sha = util.sha1
12 _sha = util.sha1
13
13
14 def _buildencodefun():
14 def _buildencodefun():
15 e = '_'
15 e = '_'
16 win_reserved = [ord(x) for x in '\\:*?"<>|']
16 win_reserved = [ord(x) for x in '\\:*?"<>|']
17 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
17 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
18 for x in (range(32) + range(126, 256) + win_reserved):
18 for x in (range(32) + range(126, 256) + win_reserved):
19 cmap[chr(x)] = "~%02x" % x
19 cmap[chr(x)] = "~%02x" % x
20 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
20 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
21 cmap[chr(x)] = e + chr(x).lower()
21 cmap[chr(x)] = e + chr(x).lower()
22 dmap = {}
22 dmap = {}
23 for k, v in cmap.iteritems():
23 for k, v in cmap.iteritems():
24 dmap[v] = k
24 dmap[v] = k
25 def decode(s):
25 def decode(s):
26 i = 0
26 i = 0
27 while i < len(s):
27 while i < len(s):
28 for l in xrange(1, 4):
28 for l in xrange(1, 4):
29 try:
29 try:
30 yield dmap[s[i:i+l]]
30 yield dmap[s[i:i+l]]
31 i += l
31 i += l
32 break
32 break
33 except KeyError:
33 except KeyError:
34 pass
34 pass
35 else:
35 else:
36 raise KeyError
36 raise KeyError
37 return (lambda s: "".join([cmap[c] for c in s]),
37 return (lambda s: "".join([cmap[c] for c in s]),
38 lambda s: "".join(list(decode(s))))
38 lambda s: "".join(list(decode(s))))
39
39
40 encodefilename, decodefilename = _buildencodefun()
40 encodefilename, decodefilename = _buildencodefun()
41
41
42 def _build_lower_encodefun():
42 def _build_lower_encodefun():
43 win_reserved = [ord(x) for x in '\\:*?"<>|']
43 win_reserved = [ord(x) for x in '\\:*?"<>|']
44 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
44 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
45 for x in (range(32) + range(126, 256) + win_reserved):
45 for x in (range(32) + range(126, 256) + win_reserved):
46 cmap[chr(x)] = "~%02x" % x
46 cmap[chr(x)] = "~%02x" % x
47 for x in range(ord("A"), ord("Z")+1):
47 for x in range(ord("A"), ord("Z")+1):
48 cmap[chr(x)] = chr(x).lower()
48 cmap[chr(x)] = chr(x).lower()
49 return lambda s: "".join([cmap[c] for c in s])
49 return lambda s: "".join([cmap[c] for c in s])
50
50
51 lowerencode = _build_lower_encodefun()
51 lowerencode = _build_lower_encodefun()
52
52
53 _windows_reserved_filenames = '''con prn aux nul
53 _windows_reserved_filenames = '''con prn aux nul
54 com1 com2 com3 com4 com5 com6 com7 com8 com9
54 com1 com2 com3 com4 com5 com6 com7 com8 com9
55 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
55 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
56 def auxencode(path):
56 def auxencode(path):
57 res = []
57 res = []
58 for n in path.split('/'):
58 for n in path.split('/'):
59 if n:
59 if n:
60 base = n.split('.')[0]
60 base = n.split('.')[0]
61 if base and (base in _windows_reserved_filenames):
61 if base and (base in _windows_reserved_filenames):
62 # encode third letter ('aux' -> 'au~78')
62 # encode third letter ('aux' -> 'au~78')
63 ec = "~%02x" % ord(n[2])
63 ec = "~%02x" % ord(n[2])
64 n = n[0:2] + ec + n[3:]
64 n = n[0:2] + ec + n[3:]
65 if n[-1] in '. ':
65 if n[-1] in '. ':
66 # encode last period or space ('foo...' -> 'foo..~2e')
66 # encode last period or space ('foo...' -> 'foo..~2e')
67 n = n[:-1] + "~%02x" % ord(n[-1])
67 n = n[:-1] + "~%02x" % ord(n[-1])
68 res.append(n)
68 res.append(n)
69 return '/'.join(res)
69 return '/'.join(res)
70
70
71 MAX_PATH_LEN_IN_HGSTORE = 120
71 MAX_PATH_LEN_IN_HGSTORE = 120
72 DIR_PREFIX_LEN = 8
72 DIR_PREFIX_LEN = 8
73 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
73 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
74 def hybridencode(path):
74 def hybridencode(path):
75 '''encodes path with a length limit
75 '''encodes path with a length limit
76
76
77 Encodes all paths that begin with 'data/', according to the following.
77 Encodes all paths that begin with 'data/', according to the following.
78
78
79 Default encoding (reversible):
79 Default encoding (reversible):
80
80
81 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
81 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
82 characters are encoded as '~xx', where xx is the two digit hex code
82 characters are encoded as '~xx', where xx is the two digit hex code
83 of the character (see encodefilename).
83 of the character (see encodefilename).
84 Relevant path components consisting of Windows reserved filenames are
84 Relevant path components consisting of Windows reserved filenames are
85 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
85 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
86
86
87 Hashed encoding (not reversible):
87 Hashed encoding (not reversible):
88
88
89 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
89 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
90 non-reversible hybrid hashing of the path is done instead.
90 non-reversible hybrid hashing of the path is done instead.
91 This encoding uses up to DIR_PREFIX_LEN characters of all directory
91 This encoding uses up to DIR_PREFIX_LEN characters of all directory
92 levels of the lowerencoded path, but not more levels than can fit into
92 levels of the lowerencoded path, but not more levels than can fit into
93 _MAX_SHORTENED_DIRS_LEN.
93 _MAX_SHORTENED_DIRS_LEN.
94 Then follows the filler followed by the sha digest of the full path.
94 Then follows the filler followed by the sha digest of the full path.
95 The filler is the beginning of the basename of the lowerencoded path
95 The filler is the beginning of the basename of the lowerencoded path
96 (the basename is everything after the last path separator). The filler
96 (the basename is everything after the last path separator). The filler
97 is as long as possible, filling in characters from the basename until
97 is as long as possible, filling in characters from the basename until
98 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
98 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
99 of the basename have been taken).
99 of the basename have been taken).
100 The extension (e.g. '.i' or '.d') is preserved.
100 The extension (e.g. '.i' or '.d') is preserved.
101
101
102 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
102 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
103 encoding was used.
103 encoding was used.
104 '''
104 '''
105 if not path.startswith('data/'):
105 if not path.startswith('data/'):
106 return path
106 return path
107 ndpath = path[len('data/'):]
107 ndpath = path[len('data/'):]
108 res = 'data/' + auxencode(encodefilename(ndpath))
108 res = 'data/' + auxencode(encodefilename(ndpath))
109 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
109 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
110 digest = _sha(path).hexdigest()
110 digest = _sha(path).hexdigest()
111 aep = auxencode(lowerencode(ndpath))
111 aep = auxencode(lowerencode(ndpath))
112 _root, ext = os.path.splitext(aep)
112 _root, ext = os.path.splitext(aep)
113 parts = aep.split('/')
113 parts = aep.split('/')
114 basename = parts[-1]
114 basename = parts[-1]
115 sdirs = []
115 sdirs = []
116 for p in parts[:-1]:
116 for p in parts[:-1]:
117 d = p[:DIR_PREFIX_LEN]
117 d = p[:DIR_PREFIX_LEN]
118 if d[-1] in '. ':
118 if d[-1] in '. ':
119 # Windows can't access dirs ending in period or space
119 # Windows can't access dirs ending in period or space
120 d = d[:-1] + '_'
120 d = d[:-1] + '_'
121 t = '/'.join(sdirs) + '/' + d
121 t = '/'.join(sdirs) + '/' + d
122 if len(t) > _MAX_SHORTENED_DIRS_LEN:
122 if len(t) > _MAX_SHORTENED_DIRS_LEN:
123 break
123 break
124 sdirs.append(d)
124 sdirs.append(d)
125 dirs = '/'.join(sdirs)
125 dirs = '/'.join(sdirs)
126 if len(dirs) > 0:
126 if len(dirs) > 0:
127 dirs += '/'
127 dirs += '/'
128 res = 'dh/' + dirs + digest + ext
128 res = 'dh/' + dirs + digest + ext
129 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
129 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
130 if space_left > 0:
130 if space_left > 0:
131 filler = basename[:space_left]
131 filler = basename[:space_left]
132 res = 'dh/' + dirs + filler + digest + ext
132 res = 'dh/' + dirs + filler + digest + ext
133 return res
133 return res
134
134
135 def _calcmode(path):
135 def _calcmode(path):
136 try:
136 try:
137 # files in .hg/ will be created using this mode
137 # files in .hg/ will be created using this mode
138 mode = os.stat(path).st_mode
138 mode = os.stat(path).st_mode
139 # avoid some useless chmods
139 # avoid some useless chmods
140 if (0777 & ~util.umask) == (0777 & mode):
140 if (0777 & ~util.umask) == (0777 & mode):
141 mode = None
141 mode = None
142 except OSError:
142 except OSError:
143 mode = None
143 mode = None
144 return mode
144 return mode
145
145
146 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
146 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
147
147
148 class basicstore:
148 class basicstore:
149 '''base class for local repository stores'''
149 '''base class for local repository stores'''
150 def __init__(self, path, opener, pathjoiner):
150 def __init__(self, path, opener, pathjoiner):
151 self.pathjoiner = pathjoiner
151 self.pathjoiner = pathjoiner
152 self.path = path
152 self.path = path
153 self.createmode = _calcmode(path)
153 self.createmode = _calcmode(path)
154 self.opener = opener(self.path)
154 self.opener = opener(self.path)
155 self.opener.createmode = self.createmode
155 self.opener.createmode = self.createmode
156
156
157 def join(self, f):
157 def join(self, f):
158 return self.pathjoiner(self.path, f)
158 return self.pathjoiner(self.path, f)
159
159
160 def _walk(self, relpath, recurse):
160 def _walk(self, relpath, recurse):
161 '''yields (unencoded, encoded, size)'''
161 '''yields (unencoded, encoded, size)'''
162 path = self.pathjoiner(self.path, relpath)
162 path = self.pathjoiner(self.path, relpath)
163 striplen = len(self.path) + len(os.sep)
163 striplen = len(self.path) + len(os.sep)
164 l = []
164 l = []
165 if os.path.isdir(path):
165 if os.path.isdir(path):
166 visit = [path]
166 visit = [path]
167 while visit:
167 while visit:
168 p = visit.pop()
168 p = visit.pop()
169 for f, kind, st in osutil.listdir(p, stat=True):
169 for f, kind, st in osutil.listdir(p, stat=True):
170 fp = self.pathjoiner(p, f)
170 fp = self.pathjoiner(p, f)
171 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
171 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
172 n = util.pconvert(fp[striplen:])
172 n = util.pconvert(fp[striplen:])
173 l.append((n, n, st.st_size))
173 l.append((n, n, st.st_size))
174 elif kind == stat.S_IFDIR and recurse:
174 elif kind == stat.S_IFDIR and recurse:
175 visit.append(fp)
175 visit.append(fp)
176 return sorted(l)
176 return sorted(l)
177
177
178 def datafiles(self):
178 def datafiles(self):
179 return self._walk('data', True)
179 return self._walk('data', True)
180
180
181 def walk(self):
181 def walk(self):
182 '''yields (unencoded, encoded, size)'''
182 '''yields (unencoded, encoded, size)'''
183 # yield data files first
183 # yield data files first
184 for x in self.datafiles():
184 for x in self.datafiles():
185 yield x
185 yield x
186 # yield manifest before changelog
186 # yield manifest before changelog
187 for x in reversed(self._walk('', False)):
187 for x in reversed(self._walk('', False)):
188 yield x
188 yield x
189
189
190 def copylist(self):
190 def copylist(self):
191 return ['requires'] + _data.split()
191 return ['requires'] + _data.split()
192
192
193 class encodedstore(basicstore):
193 class encodedstore(basicstore):
194 def __init__(self, path, opener, pathjoiner):
194 def __init__(self, path, opener, pathjoiner):
195 self.pathjoiner = pathjoiner
195 self.pathjoiner = pathjoiner
196 self.path = self.pathjoiner(path, 'store')
196 self.path = self.pathjoiner(path, 'store')
197 self.createmode = _calcmode(self.path)
197 self.createmode = _calcmode(self.path)
198 op = opener(self.path)
198 op = opener(self.path)
199 op.createmode = self.createmode
199 op.createmode = self.createmode
200 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
200 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
201
201
202 def datafiles(self):
202 def datafiles(self):
203 for a, b, size in self._walk('data', True):
203 for a, b, size in self._walk('data', True):
204 try:
204 try:
205 a = decodefilename(a)
205 a = decodefilename(a)
206 except KeyError:
206 except KeyError:
207 a = None
207 a = None
208 yield a, b, size
208 yield a, b, size
209
209
210 def join(self, f):
210 def join(self, f):
211 return self.pathjoiner(self.path, encodefilename(f))
211 return self.pathjoiner(self.path, encodefilename(f))
212
212
213 def copylist(self):
213 def copylist(self):
214 return (['requires', '00changelog.i'] +
214 return (['requires', '00changelog.i'] +
215 [self.pathjoiner('store', f) for f in _data.split()])
215 [self.pathjoiner('store', f) for f in _data.split()])
216
216
217 def fncache(opener):
217 def fncache(opener):
218 '''yields the entries in the fncache file'''
218 '''yields the entries in the fncache file'''
219 try:
219 try:
220 fp = opener('fncache', mode='rb')
220 fp = opener('fncache', mode='rb')
221 except IOError:
221 except IOError:
222 # skip nonexistent file
222 # skip nonexistent file
223 return
223 return
224 for n, line in enumerate(fp):
224 for n, line in enumerate(fp):
225 if (len(line) < 2) or (line[-1] != '\n'):
225 if (len(line) < 2) or (line[-1] != '\n'):
226 t = _('invalid entry in fncache, line %s') % (n + 1)
226 t = _('invalid entry in fncache, line %s') % (n + 1)
227 raise util.Abort(t)
227 raise util.Abort(t)
228 yield line[:-1]
228 yield line[:-1]
229 fp.close()
229 fp.close()
230
230
231 class fncacheopener(object):
231 class fncacheopener(object):
232 def __init__(self, opener):
232 def __init__(self, opener):
233 self.opener = opener
233 self.opener = opener
234 self.entries = None
234 self.entries = None
235
235
236 def loadfncache(self):
236 def loadfncache(self):
237 self.entries = set()
237 self.entries = set(fncache(self.opener))
238 for f in fncache(self.opener):
239 self.entries.add(f)
240
238
241 def __call__(self, path, mode='r', *args, **kw):
239 def __call__(self, path, mode='r', *args, **kw):
242 if mode not in ('r', 'rb') and path.startswith('data/'):
240 if mode not in ('r', 'rb') and path.startswith('data/'):
243 if self.entries is None:
241 if self.entries is None:
244 self.loadfncache()
242 self.loadfncache()
245 if path not in self.entries:
243 if path not in self.entries:
246 self.opener('fncache', 'ab').write(path + '\n')
244 self.opener('fncache', 'ab').write(path + '\n')
247 # fncache may contain non-existent files after rollback / strip
245 # fncache may contain non-existent files after rollback / strip
248 self.entries.add(path)
246 self.entries.add(path)
249 return self.opener(hybridencode(path), mode, *args, **kw)
247 return self.opener(hybridencode(path), mode, *args, **kw)
250
248
251 class fncachestore(basicstore):
249 class fncachestore(basicstore):
252 def __init__(self, path, opener, pathjoiner):
250 def __init__(self, path, opener, pathjoiner):
253 self.pathjoiner = pathjoiner
251 self.pathjoiner = pathjoiner
254 self.path = self.pathjoiner(path, 'store')
252 self.path = self.pathjoiner(path, 'store')
255 self.createmode = _calcmode(self.path)
253 self.createmode = _calcmode(self.path)
256 self._op = opener(self.path)
254 self._op = opener(self.path)
257 self._op.createmode = self.createmode
255 self._op.createmode = self.createmode
258 self.opener = fncacheopener(self._op)
256 self.opener = fncacheopener(self._op)
259
257
260 def join(self, f):
258 def join(self, f):
261 return self.pathjoiner(self.path, hybridencode(f))
259 return self.pathjoiner(self.path, hybridencode(f))
262
260
263 def datafiles(self):
261 def datafiles(self):
264 rewrite = False
262 rewrite = False
265 existing = []
263 existing = []
266 pjoin = self.pathjoiner
264 pjoin = self.pathjoiner
267 spath = self.path
265 spath = self.path
268 for f in fncache(self._op):
266 for f in fncache(self._op):
269 ef = hybridencode(f)
267 ef = hybridencode(f)
270 try:
268 try:
271 st = os.stat(pjoin(spath, ef))
269 st = os.stat(pjoin(spath, ef))
272 yield f, ef, st.st_size
270 yield f, ef, st.st_size
273 existing.append(f)
271 existing.append(f)
274 except OSError:
272 except OSError:
275 # nonexistent entry
273 # nonexistent entry
276 rewrite = True
274 rewrite = True
277 if rewrite:
275 if rewrite:
278 # rewrite fncache to remove nonexistent entries
276 # rewrite fncache to remove nonexistent entries
279 # (may be caused by rollback / strip)
277 # (may be caused by rollback / strip)
280 fp = self._op('fncache', mode='wb')
278 fp = self._op('fncache', mode='wb')
281 for p in existing:
279 for p in existing:
282 fp.write(p + '\n')
280 fp.write(p + '\n')
283 fp.close()
281 fp.close()
284
282
285 def copylist(self):
283 def copylist(self):
286 d = _data + ' dh fncache'
284 d = _data + ' dh fncache'
287 return (['requires', '00changelog.i'] +
285 return (['requires', '00changelog.i'] +
288 [self.pathjoiner('store', f) for f in d.split()])
286 [self.pathjoiner('store', f) for f in d.split()])
289
287
290 def store(requirements, path, opener, pathjoiner=None):
288 def store(requirements, path, opener, pathjoiner=None):
291 pathjoiner = pathjoiner or os.path.join
289 pathjoiner = pathjoiner or os.path.join
292 if 'store' in requirements:
290 if 'store' in requirements:
293 if 'fncache' in requirements:
291 if 'fncache' in requirements:
294 return fncachestore(path, opener, pathjoiner)
292 return fncachestore(path, opener, pathjoiner)
295 return encodedstore(path, opener, pathjoiner)
293 return encodedstore(path, opener, pathjoiner)
296 return basicstore(path, opener, pathjoiner)
294 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now