##// END OF EJS Templates
store: use set instead of dict
Benoit Boissinot -
r8467:9890151a default
parent child Browse files
Show More
@@ -1,296 +1,296 b''
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 from i18n import _
9 9 import osutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 def _buildencodefun():
15 15 e = '_'
16 16 win_reserved = [ord(x) for x in '\\:*?"<>|']
17 17 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
18 18 for x in (range(32) + range(126, 256) + win_reserved):
19 19 cmap[chr(x)] = "~%02x" % x
20 20 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
21 21 cmap[chr(x)] = e + chr(x).lower()
22 22 dmap = {}
23 23 for k, v in cmap.iteritems():
24 24 dmap[v] = k
25 25 def decode(s):
26 26 i = 0
27 27 while i < len(s):
28 28 for l in xrange(1, 4):
29 29 try:
30 30 yield dmap[s[i:i+l]]
31 31 i += l
32 32 break
33 33 except KeyError:
34 34 pass
35 35 else:
36 36 raise KeyError
37 37 return (lambda s: "".join([cmap[c] for c in s]),
38 38 lambda s: "".join(list(decode(s))))
39 39
40 40 encodefilename, decodefilename = _buildencodefun()
41 41
42 42 def _build_lower_encodefun():
43 43 win_reserved = [ord(x) for x in '\\:*?"<>|']
44 44 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
45 45 for x in (range(32) + range(126, 256) + win_reserved):
46 46 cmap[chr(x)] = "~%02x" % x
47 47 for x in range(ord("A"), ord("Z")+1):
48 48 cmap[chr(x)] = chr(x).lower()
49 49 return lambda s: "".join([cmap[c] for c in s])
50 50
51 51 lowerencode = _build_lower_encodefun()
52 52
53 53 _windows_reserved_filenames = '''con prn aux nul
54 54 com1 com2 com3 com4 com5 com6 com7 com8 com9
55 55 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
56 56 def auxencode(path):
57 57 res = []
58 58 for n in path.split('/'):
59 59 if n:
60 60 base = n.split('.')[0]
61 61 if base and (base in _windows_reserved_filenames):
62 62 # encode third letter ('aux' -> 'au~78')
63 63 ec = "~%02x" % ord(n[2])
64 64 n = n[0:2] + ec + n[3:]
65 65 if n[-1] in '. ':
66 66 # encode last period or space ('foo...' -> 'foo..~2e')
67 67 n = n[:-1] + "~%02x" % ord(n[-1])
68 68 res.append(n)
69 69 return '/'.join(res)
70 70
71 71 MAX_PATH_LEN_IN_HGSTORE = 120
72 72 DIR_PREFIX_LEN = 8
73 73 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
74 74 def hybridencode(path):
75 75 '''encodes path with a length limit
76 76
77 77 Encodes all paths that begin with 'data/', according to the following.
78 78
79 79 Default encoding (reversible):
80 80
81 81 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
82 82 characters are encoded as '~xx', where xx is the two digit hex code
83 83 of the character (see encodefilename).
84 84 Relevant path components consisting of Windows reserved filenames are
85 85 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
86 86
87 87 Hashed encoding (not reversible):
88 88
89 89 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
90 90 non-reversible hybrid hashing of the path is done instead.
91 91 This encoding uses up to DIR_PREFIX_LEN characters of all directory
92 92 levels of the lowerencoded path, but not more levels than can fit into
93 93 _MAX_SHORTENED_DIRS_LEN.
94 94 Then follows the filler followed by the sha digest of the full path.
95 95 The filler is the beginning of the basename of the lowerencoded path
96 96 (the basename is everything after the last path separator). The filler
97 97 is as long as possible, filling in characters from the basename until
98 98 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
99 99 of the basename have been taken).
100 100 The extension (e.g. '.i' or '.d') is preserved.
101 101
102 102 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
103 103 encoding was used.
104 104 '''
105 105 if not path.startswith('data/'):
106 106 return path
107 107 ndpath = path[len('data/'):]
108 108 res = 'data/' + auxencode(encodefilename(ndpath))
109 109 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
110 110 digest = _sha(path).hexdigest()
111 111 aep = auxencode(lowerencode(ndpath))
112 112 _root, ext = os.path.splitext(aep)
113 113 parts = aep.split('/')
114 114 basename = parts[-1]
115 115 sdirs = []
116 116 for p in parts[:-1]:
117 117 d = p[:DIR_PREFIX_LEN]
118 118 if d[-1] in '. ':
119 119 # Windows can't access dirs ending in period or space
120 120 d = d[:-1] + '_'
121 121 t = '/'.join(sdirs) + '/' + d
122 122 if len(t) > _MAX_SHORTENED_DIRS_LEN:
123 123 break
124 124 sdirs.append(d)
125 125 dirs = '/'.join(sdirs)
126 126 if len(dirs) > 0:
127 127 dirs += '/'
128 128 res = 'dh/' + dirs + digest + ext
129 129 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
130 130 if space_left > 0:
131 131 filler = basename[:space_left]
132 132 res = 'dh/' + dirs + filler + digest + ext
133 133 return res
134 134
135 135 def _calcmode(path):
136 136 try:
137 137 # files in .hg/ will be created using this mode
138 138 mode = os.stat(path).st_mode
139 139 # avoid some useless chmods
140 140 if (0777 & ~util.umask) == (0777 & mode):
141 141 mode = None
142 142 except OSError:
143 143 mode = None
144 144 return mode
145 145
146 146 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
147 147
148 148 class basicstore:
149 149 '''base class for local repository stores'''
150 150 def __init__(self, path, opener, pathjoiner):
151 151 self.pathjoiner = pathjoiner
152 152 self.path = path
153 153 self.createmode = _calcmode(path)
154 154 self.opener = opener(self.path)
155 155 self.opener.createmode = self.createmode
156 156
157 157 def join(self, f):
158 158 return self.pathjoiner(self.path, f)
159 159
160 160 def _walk(self, relpath, recurse):
161 161 '''yields (unencoded, encoded, size)'''
162 162 path = self.pathjoiner(self.path, relpath)
163 163 striplen = len(self.path) + len(os.sep)
164 164 l = []
165 165 if os.path.isdir(path):
166 166 visit = [path]
167 167 while visit:
168 168 p = visit.pop()
169 169 for f, kind, st in osutil.listdir(p, stat=True):
170 170 fp = self.pathjoiner(p, f)
171 171 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
172 172 n = util.pconvert(fp[striplen:])
173 173 l.append((n, n, st.st_size))
174 174 elif kind == stat.S_IFDIR and recurse:
175 175 visit.append(fp)
176 176 return sorted(l)
177 177
178 178 def datafiles(self):
179 179 return self._walk('data', True)
180 180
181 181 def walk(self):
182 182 '''yields (unencoded, encoded, size)'''
183 183 # yield data files first
184 184 for x in self.datafiles():
185 185 yield x
186 186 # yield manifest before changelog
187 187 for x in reversed(self._walk('', False)):
188 188 yield x
189 189
190 190 def copylist(self):
191 191 return ['requires'] + _data.split()
192 192
193 193 class encodedstore(basicstore):
194 194 def __init__(self, path, opener, pathjoiner):
195 195 self.pathjoiner = pathjoiner
196 196 self.path = self.pathjoiner(path, 'store')
197 197 self.createmode = _calcmode(self.path)
198 198 op = opener(self.path)
199 199 op.createmode = self.createmode
200 200 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
201 201
202 202 def datafiles(self):
203 203 for a, b, size in self._walk('data', True):
204 204 try:
205 205 a = decodefilename(a)
206 206 except KeyError:
207 207 a = None
208 208 yield a, b, size
209 209
210 210 def join(self, f):
211 211 return self.pathjoiner(self.path, encodefilename(f))
212 212
213 213 def copylist(self):
214 214 return (['requires', '00changelog.i'] +
215 215 [self.pathjoiner('store', f) for f in _data.split()])
216 216
217 217 def fncache(opener):
218 218 '''yields the entries in the fncache file'''
219 219 try:
220 220 fp = opener('fncache', mode='rb')
221 221 except IOError:
222 222 # skip nonexistent file
223 223 return
224 224 for n, line in enumerate(fp):
225 225 if (len(line) < 2) or (line[-1] != '\n'):
226 226 t = _('invalid entry in fncache, line %s') % (n + 1)
227 227 raise util.Abort(t)
228 228 yield line[:-1]
229 229 fp.close()
230 230
231 231 class fncacheopener(object):
232 232 def __init__(self, opener):
233 233 self.opener = opener
234 234 self.entries = None
235 235
236 236 def loadfncache(self):
237 self.entries = {}
237 self.entries = set()
238 238 for f in fncache(self.opener):
239 self.entries[f] = True
239 self.entries.add(f)
240 240
241 241 def __call__(self, path, mode='r', *args, **kw):
242 242 if mode not in ('r', 'rb') and path.startswith('data/'):
243 243 if self.entries is None:
244 244 self.loadfncache()
245 245 if path not in self.entries:
246 246 self.opener('fncache', 'ab').write(path + '\n')
247 247 # fncache may contain non-existent files after rollback / strip
248 self.entries[path] = True
248 self.entries.add(path)
249 249 return self.opener(hybridencode(path), mode, *args, **kw)
250 250
251 251 class fncachestore(basicstore):
252 252 def __init__(self, path, opener, pathjoiner):
253 253 self.pathjoiner = pathjoiner
254 254 self.path = self.pathjoiner(path, 'store')
255 255 self.createmode = _calcmode(self.path)
256 256 self._op = opener(self.path)
257 257 self._op.createmode = self.createmode
258 258 self.opener = fncacheopener(self._op)
259 259
260 260 def join(self, f):
261 261 return self.pathjoiner(self.path, hybridencode(f))
262 262
263 263 def datafiles(self):
264 264 rewrite = False
265 265 existing = []
266 266 pjoin = self.pathjoiner
267 267 spath = self.path
268 268 for f in fncache(self._op):
269 269 ef = hybridencode(f)
270 270 try:
271 271 st = os.stat(pjoin(spath, ef))
272 272 yield f, ef, st.st_size
273 273 existing.append(f)
274 274 except OSError:
275 275 # nonexistent entry
276 276 rewrite = True
277 277 if rewrite:
278 278 # rewrite fncache to remove nonexistent entries
279 279 # (may be caused by rollback / strip)
280 280 fp = self._op('fncache', mode='wb')
281 281 for p in existing:
282 282 fp.write(p + '\n')
283 283 fp.close()
284 284
285 285 def copylist(self):
286 286 d = _data + ' dh fncache'
287 287 return (['requires', '00changelog.i'] +
288 288 [self.pathjoiner('store', f) for f in d.split()])
289 289
290 290 def store(requirements, path, opener, pathjoiner=None):
291 291 pathjoiner = pathjoiner or os.path.join
292 292 if 'store' in requirements:
293 293 if 'fncache' in requirements:
294 294 return fncachestore(path, opener, pathjoiner)
295 295 return encodedstore(path, opener, pathjoiner)
296 296 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now