##// END OF EJS Templates
store: skip decodir check if path does not contain '.hg/'...
Nicolas Dumazet -
r11790:ba9957bc default
parent child Browse files
Show More
@@ -1,333 +1,333
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from i18n import _
9 9 import osutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 if not path.startswith('data/'):
18 18 return path
19 19 return (path
20 20 .replace(".hg/", ".hg.hg/")
21 21 .replace(".i/", ".i.hg/")
22 22 .replace(".d/", ".d.hg/"))
23 23
24 24 def decodedir(path):
25 if not path.startswith('data/'):
25 if not path.startswith('data/') or ".hg/" not in path:
26 26 return path
27 27 return (path
28 28 .replace(".d.hg/", ".d/")
29 29 .replace(".i.hg/", ".i/")
30 30 .replace(".hg.hg/", ".hg/"))
31 31
32 32 def _buildencodefun():
33 33 e = '_'
34 34 win_reserved = [ord(x) for x in '\\:*?"<>|']
35 35 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
36 36 for x in (range(32) + range(126, 256) + win_reserved):
37 37 cmap[chr(x)] = "~%02x" % x
38 38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
39 39 cmap[chr(x)] = e + chr(x).lower()
40 40 dmap = {}
41 41 for k, v in cmap.iteritems():
42 42 dmap[v] = k
43 43 def decode(s):
44 44 i = 0
45 45 while i < len(s):
46 46 for l in xrange(1, 4):
47 47 try:
48 48 yield dmap[s[i:i + l]]
49 49 i += l
50 50 break
51 51 except KeyError:
52 52 pass
53 53 else:
54 54 raise KeyError
55 55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
56 56 lambda s: decodedir("".join(list(decode(s)))))
57 57
58 58 encodefilename, decodefilename = _buildencodefun()
59 59
60 60 def _build_lower_encodefun():
61 61 win_reserved = [ord(x) for x in '\\:*?"<>|']
62 62 cmap = dict([(chr(x), chr(x)) for x in xrange(127)])
63 63 for x in (range(32) + range(126, 256) + win_reserved):
64 64 cmap[chr(x)] = "~%02x" % x
65 65 for x in range(ord("A"), ord("Z")+1):
66 66 cmap[chr(x)] = chr(x).lower()
67 67 return lambda s: "".join([cmap[c] for c in s])
68 68
69 69 lowerencode = _build_lower_encodefun()
70 70
71 71 _windows_reserved_filenames = '''con prn aux nul
72 72 com1 com2 com3 com4 com5 com6 com7 com8 com9
73 73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
74 74 def auxencode(path):
75 75 res = []
76 76 for n in path.split('/'):
77 77 if n:
78 78 base = n.split('.')[0]
79 79 if base and (base in _windows_reserved_filenames):
80 80 # encode third letter ('aux' -> 'au~78')
81 81 ec = "~%02x" % ord(n[2])
82 82 n = n[0:2] + ec + n[3:]
83 83 if n[-1] in '. ':
84 84 # encode last period or space ('foo...' -> 'foo..~2e')
85 85 n = n[:-1] + "~%02x" % ord(n[-1])
86 86 res.append(n)
87 87 return '/'.join(res)
88 88
89 89 MAX_PATH_LEN_IN_HGSTORE = 120
90 90 DIR_PREFIX_LEN = 8
91 91 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
92 92 def hybridencode(path):
93 93 '''encodes path with a length limit
94 94
95 95 Encodes all paths that begin with 'data/', according to the following.
96 96
97 97 Default encoding (reversible):
98 98
99 99 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
100 100 characters are encoded as '~xx', where xx is the two digit hex code
101 101 of the character (see encodefilename).
102 102 Relevant path components consisting of Windows reserved filenames are
103 103 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
104 104
105 105 Hashed encoding (not reversible):
106 106
107 107 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
108 108 non-reversible hybrid hashing of the path is done instead.
109 109 This encoding uses up to DIR_PREFIX_LEN characters of all directory
110 110 levels of the lowerencoded path, but not more levels than can fit into
111 111 _MAX_SHORTENED_DIRS_LEN.
112 112 Then follows the filler followed by the sha digest of the full path.
113 113 The filler is the beginning of the basename of the lowerencoded path
114 114 (the basename is everything after the last path separator). The filler
115 115 is as long as possible, filling in characters from the basename until
116 116 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
117 117 of the basename have been taken).
118 118 The extension (e.g. '.i' or '.d') is preserved.
119 119
120 120 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
121 121 encoding was used.
122 122 '''
123 123 if not path.startswith('data/'):
124 124 return path
125 125 # escape directories ending with .i and .d
126 126 path = encodedir(path)
127 127 ndpath = path[len('data/'):]
128 128 res = 'data/' + auxencode(encodefilename(ndpath))
129 129 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
130 130 digest = _sha(path).hexdigest()
131 131 aep = auxencode(lowerencode(ndpath))
132 132 _root, ext = os.path.splitext(aep)
133 133 parts = aep.split('/')
134 134 basename = parts[-1]
135 135 sdirs = []
136 136 for p in parts[:-1]:
137 137 d = p[:DIR_PREFIX_LEN]
138 138 if d[-1] in '. ':
139 139 # Windows can't access dirs ending in period or space
140 140 d = d[:-1] + '_'
141 141 t = '/'.join(sdirs) + '/' + d
142 142 if len(t) > _MAX_SHORTENED_DIRS_LEN:
143 143 break
144 144 sdirs.append(d)
145 145 dirs = '/'.join(sdirs)
146 146 if len(dirs) > 0:
147 147 dirs += '/'
148 148 res = 'dh/' + dirs + digest + ext
149 149 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
150 150 if space_left > 0:
151 151 filler = basename[:space_left]
152 152 res = 'dh/' + dirs + filler + digest + ext
153 153 return res
154 154
155 155 def _calcmode(path):
156 156 try:
157 157 # files in .hg/ will be created using this mode
158 158 mode = os.stat(path).st_mode
159 159 # avoid some useless chmods
160 160 if (0777 & ~util.umask) == (0777 & mode):
161 161 mode = None
162 162 except OSError:
163 163 mode = None
164 164 return mode
165 165
166 166 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
167 167
168 168 class basicstore(object):
169 169 '''base class for local repository stores'''
170 170 def __init__(self, path, opener, pathjoiner):
171 171 self.pathjoiner = pathjoiner
172 172 self.path = path
173 173 self.createmode = _calcmode(path)
174 174 op = opener(self.path)
175 175 op.createmode = self.createmode
176 176 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
177 177
178 178 def join(self, f):
179 179 return self.pathjoiner(self.path, encodedir(f))
180 180
181 181 def _walk(self, relpath, recurse):
182 182 '''yields (unencoded, encoded, size)'''
183 183 path = self.pathjoiner(self.path, relpath)
184 184 striplen = len(self.path) + len(os.sep)
185 185 l = []
186 186 if os.path.isdir(path):
187 187 visit = [path]
188 188 while visit:
189 189 p = visit.pop()
190 190 for f, kind, st in osutil.listdir(p, stat=True):
191 191 fp = self.pathjoiner(p, f)
192 192 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
193 193 n = util.pconvert(fp[striplen:])
194 194 l.append((decodedir(n), n, st.st_size))
195 195 elif kind == stat.S_IFDIR and recurse:
196 196 visit.append(fp)
197 197 return sorted(l)
198 198
199 199 def datafiles(self):
200 200 return self._walk('data', True)
201 201
202 202 def walk(self):
203 203 '''yields (unencoded, encoded, size)'''
204 204 # yield data files first
205 205 for x in self.datafiles():
206 206 yield x
207 207 # yield manifest before changelog
208 208 for x in reversed(self._walk('', False)):
209 209 yield x
210 210
211 211 def copylist(self):
212 212 return ['requires'] + _data.split()
213 213
214 214 class encodedstore(basicstore):
215 215 def __init__(self, path, opener, pathjoiner):
216 216 self.pathjoiner = pathjoiner
217 217 self.path = self.pathjoiner(path, 'store')
218 218 self.createmode = _calcmode(self.path)
219 219 op = opener(self.path)
220 220 op.createmode = self.createmode
221 221 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
222 222
223 223 def datafiles(self):
224 224 for a, b, size in self._walk('data', True):
225 225 try:
226 226 a = decodefilename(a)
227 227 except KeyError:
228 228 a = None
229 229 yield a, b, size
230 230
231 231 def join(self, f):
232 232 return self.pathjoiner(self.path, encodefilename(f))
233 233
234 234 def copylist(self):
235 235 return (['requires', '00changelog.i'] +
236 236 [self.pathjoiner('store', f) for f in _data.split()])
237 237
238 238 class fncache(object):
239 239 # the filename used to be partially encoded
240 240 # hence the encodedir/decodedir dance
241 241 def __init__(self, opener):
242 242 self.opener = opener
243 243 self.entries = None
244 244
245 245 def _load(self):
246 246 '''fill the entries from the fncache file'''
247 247 self.entries = set()
248 248 try:
249 249 fp = self.opener('fncache', mode='rb')
250 250 except IOError:
251 251 # skip nonexistent file
252 252 return
253 253 for n, line in enumerate(fp):
254 254 if (len(line) < 2) or (line[-1] != '\n'):
255 255 t = _('invalid entry in fncache, line %s') % (n + 1)
256 256 raise util.Abort(t)
257 257 self.entries.add(decodedir(line[:-1]))
258 258 fp.close()
259 259
260 260 def rewrite(self, files):
261 261 fp = self.opener('fncache', mode='wb')
262 262 for p in files:
263 263 fp.write(encodedir(p) + '\n')
264 264 fp.close()
265 265 self.entries = set(files)
266 266
267 267 def add(self, fn):
268 268 if self.entries is None:
269 269 self._load()
270 270 if fn not in self.entries:
271 271 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
272 272 self.entries.add(fn)
273 273
274 274 def __contains__(self, fn):
275 275 if self.entries is None:
276 276 self._load()
277 277 return fn in self.entries
278 278
279 279 def __iter__(self):
280 280 if self.entries is None:
281 281 self._load()
282 282 return iter(self.entries)
283 283
284 284 class fncachestore(basicstore):
285 285 def __init__(self, path, opener, pathjoiner):
286 286 self.pathjoiner = pathjoiner
287 287 self.path = self.pathjoiner(path, 'store')
288 288 self.createmode = _calcmode(self.path)
289 289 op = opener(self.path)
290 290 op.createmode = self.createmode
291 291 fnc = fncache(op)
292 292 self.fncache = fnc
293 293
294 294 def fncacheopener(path, mode='r', *args, **kw):
295 295 if mode not in ('r', 'rb') and path.startswith('data/'):
296 296 fnc.add(path)
297 297 return op(hybridencode(path), mode, *args, **kw)
298 298 self.opener = fncacheopener
299 299
300 300 def join(self, f):
301 301 return self.pathjoiner(self.path, hybridencode(f))
302 302
303 303 def datafiles(self):
304 304 rewrite = False
305 305 existing = []
306 306 pjoin = self.pathjoiner
307 307 spath = self.path
308 308 for f in self.fncache:
309 309 ef = hybridencode(f)
310 310 try:
311 311 st = os.stat(pjoin(spath, ef))
312 312 yield f, ef, st.st_size
313 313 existing.append(f)
314 314 except OSError:
315 315 # nonexistent entry
316 316 rewrite = True
317 317 if rewrite:
318 318 # rewrite fncache to remove nonexistent entries
319 319 # (may be caused by rollback / strip)
320 320 self.fncache.rewrite(existing)
321 321
322 322 def copylist(self):
323 323 d = _data + ' dh fncache'
324 324 return (['requires', '00changelog.i'] +
325 325 [self.pathjoiner('store', f) for f in d.split()])
326 326
327 327 def store(requirements, path, opener, pathjoiner=None):
328 328 pathjoiner = pathjoiner or os.path.join
329 329 if 'store' in requirements:
330 330 if 'fncache' in requirements:
331 331 return fncachestore(path, opener, pathjoiner)
332 332 return encodedstore(path, opener, pathjoiner)
333 333 return basicstore(path, opener, pathjoiner)
General Comments 0
You need to be logged in to leave comments. Login now