##// END OF EJS Templates
store encoding: .i/.d encoding for non-store repo (broken by 810387f59696)
Benoit Boissinot -
r8633:c31fe74a default
parent child Browse files
Show More
@@ -1,331 +1,332
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 from i18n import _
9 9 import osutil, util
10 10 import os, stat
11 11
12 12 _sha = util.sha1
13 13
14 14 # This avoids a collision between a file named foo and a dir named
15 15 # foo.i or foo.d
16 16 def encodedir(path):
17 17 if not path.startswith('data/'):
18 18 return path
19 19 return (path
20 20 .replace(".hg/", ".hg.hg/")
21 21 .replace(".i/", ".i.hg/")
22 22 .replace(".d/", ".d.hg/"))
23 23
24 24 def decodedir(path):
25 25 if not path.startswith('data/'):
26 26 return path
27 27 return (path
28 28 .replace(".d.hg/", ".d/")
29 29 .replace(".i.hg/", ".i/")
30 30 .replace(".hg.hg/", ".hg/"))
31 31
32 32 def _buildencodefun():
33 33 e = '_'
34 34 win_reserved = [ord(x) for x in '\\:*?"<>|']
35 35 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
36 36 for x in (range(32) + range(126, 256) + win_reserved):
37 37 cmap[chr(x)] = "~%02x" % x
38 38 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
39 39 cmap[chr(x)] = e + chr(x).lower()
40 40 dmap = {}
41 41 for k, v in cmap.iteritems():
42 42 dmap[v] = k
43 43 def decode(s):
44 44 i = 0
45 45 while i < len(s):
46 46 for l in xrange(1, 4):
47 47 try:
48 48 yield dmap[s[i:i+l]]
49 49 i += l
50 50 break
51 51 except KeyError:
52 52 pass
53 53 else:
54 54 raise KeyError
55 55 return (lambda s: "".join([cmap[c] for c in encodedir(s)]),
56 56 lambda s: decodedir("".join(list(decode(s)))))
57 57
58 58 encodefilename, decodefilename = _buildencodefun()
59 59
60 60 def _build_lower_encodefun():
61 61 win_reserved = [ord(x) for x in '\\:*?"<>|']
62 62 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
63 63 for x in (range(32) + range(126, 256) + win_reserved):
64 64 cmap[chr(x)] = "~%02x" % x
65 65 for x in range(ord("A"), ord("Z")+1):
66 66 cmap[chr(x)] = chr(x).lower()
67 67 return lambda s: "".join([cmap[c] for c in s])
68 68
69 69 lowerencode = _build_lower_encodefun()
70 70
71 71 _windows_reserved_filenames = '''con prn aux nul
72 72 com1 com2 com3 com4 com5 com6 com7 com8 com9
73 73 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
74 74 def auxencode(path):
75 75 res = []
76 76 for n in path.split('/'):
77 77 if n:
78 78 base = n.split('.')[0]
79 79 if base and (base in _windows_reserved_filenames):
80 80 # encode third letter ('aux' -> 'au~78')
81 81 ec = "~%02x" % ord(n[2])
82 82 n = n[0:2] + ec + n[3:]
83 83 if n[-1] in '. ':
84 84 # encode last period or space ('foo...' -> 'foo..~2e')
85 85 n = n[:-1] + "~%02x" % ord(n[-1])
86 86 res.append(n)
87 87 return '/'.join(res)
88 88
89 89 MAX_PATH_LEN_IN_HGSTORE = 120
90 90 DIR_PREFIX_LEN = 8
91 91 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
92 92 def hybridencode(path):
93 93 '''encodes path with a length limit
94 94
95 95 Encodes all paths that begin with 'data/', according to the following.
96 96
97 97 Default encoding (reversible):
98 98
99 99 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
100 100 characters are encoded as '~xx', where xx is the two digit hex code
101 101 of the character (see encodefilename).
102 102 Relevant path components consisting of Windows reserved filenames are
103 103 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
104 104
105 105 Hashed encoding (not reversible):
106 106
107 107 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
108 108 non-reversible hybrid hashing of the path is done instead.
109 109 This encoding uses up to DIR_PREFIX_LEN characters of all directory
110 110 levels of the lowerencoded path, but not more levels than can fit into
111 111 _MAX_SHORTENED_DIRS_LEN.
112 112 Then follows the filler followed by the sha digest of the full path.
113 113 The filler is the beginning of the basename of the lowerencoded path
114 114 (the basename is everything after the last path separator). The filler
115 115 is as long as possible, filling in characters from the basename until
116 116 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
117 117 of the basename have been taken).
118 118 The extension (e.g. '.i' or '.d') is preserved.
119 119
120 120 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
121 121 encoding was used.
122 122 '''
123 123 if not path.startswith('data/'):
124 124 return path
125 125 # escape directories ending with .i and .d
126 126 path = encodedir(path)
127 127 ndpath = path[len('data/'):]
128 128 res = 'data/' + auxencode(encodefilename(ndpath))
129 129 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
130 130 digest = _sha(path).hexdigest()
131 131 aep = auxencode(lowerencode(ndpath))
132 132 _root, ext = os.path.splitext(aep)
133 133 parts = aep.split('/')
134 134 basename = parts[-1]
135 135 sdirs = []
136 136 for p in parts[:-1]:
137 137 d = p[:DIR_PREFIX_LEN]
138 138 if d[-1] in '. ':
139 139 # Windows can't access dirs ending in period or space
140 140 d = d[:-1] + '_'
141 141 t = '/'.join(sdirs) + '/' + d
142 142 if len(t) > _MAX_SHORTENED_DIRS_LEN:
143 143 break
144 144 sdirs.append(d)
145 145 dirs = '/'.join(sdirs)
146 146 if len(dirs) > 0:
147 147 dirs += '/'
148 148 res = 'dh/' + dirs + digest + ext
149 149 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
150 150 if space_left > 0:
151 151 filler = basename[:space_left]
152 152 res = 'dh/' + dirs + filler + digest + ext
153 153 return res
154 154
155 155 def _calcmode(path):
156 156 try:
157 157 # files in .hg/ will be created using this mode
158 158 mode = os.stat(path).st_mode
159 159 # avoid some useless chmods
160 160 if (0777 & ~util.umask) == (0777 & mode):
161 161 mode = None
162 162 except OSError:
163 163 mode = None
164 164 return mode
165 165
166 166 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
167 167
168 168 class basicstore:
169 169 '''base class for local repository stores'''
170 170 def __init__(self, path, opener, pathjoiner):
171 171 self.pathjoiner = pathjoiner
172 172 self.path = path
173 173 self.createmode = _calcmode(path)
174 self.opener = opener(self.path)
175 self.opener.createmode = self.createmode
174 op = opener(self.path)
175 op.createmode = self.createmode
176 self.opener = lambda f, *args, **kw: op(encodedir(f), *args, **kw)
176 177
177 178 def join(self, f):
178 179 return self.pathjoiner(self.path, encodedir(f))
179 180
180 181 def _walk(self, relpath, recurse):
181 182 '''yields (unencoded, encoded, size)'''
182 183 path = self.pathjoiner(self.path, relpath)
183 184 striplen = len(self.path) + len(os.sep)
184 185 l = []
185 186 if os.path.isdir(path):
186 187 visit = [path]
187 188 while visit:
188 189 p = visit.pop()
189 190 for f, kind, st in osutil.listdir(p, stat=True):
190 191 fp = self.pathjoiner(p, f)
191 192 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
192 193 n = util.pconvert(fp[striplen:])
193 194 l.append((decodedir(n), n, st.st_size))
194 195 elif kind == stat.S_IFDIR and recurse:
195 196 visit.append(fp)
196 197 return sorted(l)
197 198
198 199 def datafiles(self):
199 200 return self._walk('data', True)
200 201
201 202 def walk(self):
202 203 '''yields (unencoded, encoded, size)'''
203 204 # yield data files first
204 205 for x in self.datafiles():
205 206 yield x
206 207 # yield manifest before changelog
207 208 for x in reversed(self._walk('', False)):
208 209 yield x
209 210
210 211 def copylist(self):
211 212 return ['requires'] + _data.split()
212 213
213 214 class encodedstore(basicstore):
214 215 def __init__(self, path, opener, pathjoiner):
215 216 self.pathjoiner = pathjoiner
216 217 self.path = self.pathjoiner(path, 'store')
217 218 self.createmode = _calcmode(self.path)
218 219 op = opener(self.path)
219 220 op.createmode = self.createmode
220 221 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
221 222
222 223 def datafiles(self):
223 224 for a, b, size in self._walk('data', True):
224 225 try:
225 226 a = decodefilename(a)
226 227 except KeyError:
227 228 a = None
228 229 yield a, b, size
229 230
230 231 def join(self, f):
231 232 return self.pathjoiner(self.path, encodefilename(f))
232 233
233 234 def copylist(self):
234 235 return (['requires', '00changelog.i'] +
235 236 [self.pathjoiner('store', f) for f in _data.split()])
236 237
237 238 class fncache(object):
238 239 # the filename used to be partially encoded
239 240 # hence the encodedir/decodedir dance
240 241 def __init__(self, opener):
241 242 self.opener = opener
242 243 self.entries = None
243 244
244 245 def _load(self):
245 246 '''fill the entries from the fncache file'''
246 247 self.entries = set()
247 248 try:
248 249 fp = self.opener('fncache', mode='rb')
249 250 except IOError:
250 251 # skip nonexistent file
251 252 return
252 253 for n, line in enumerate(fp):
253 254 if (len(line) < 2) or (line[-1] != '\n'):
254 255 t = _('invalid entry in fncache, line %s') % (n + 1)
255 256 raise util.Abort(t)
256 257 self.entries.add(decodedir(line[:-1]))
257 258 fp.close()
258 259
259 260 def rewrite(self, files):
260 261 fp = self.opener('fncache', mode='wb')
261 262 for p in files:
262 263 fp.write(encodedir(p) + '\n')
263 264 fp.close()
264 265 self.entries = set(files)
265 266
266 267 def add(self, fn):
267 268 if self.entries is None:
268 269 self._load()
269 270 self.opener('fncache', 'ab').write(encodedir(fn) + '\n')
270 271
271 272 def __contains__(self, fn):
272 273 if self.entries is None:
273 274 self._load()
274 275 return fn in self.entries
275 276
276 277 def __iter__(self):
277 278 if self.entries is None:
278 279 self._load()
279 280 return iter(self.entries)
280 281
281 282 class fncachestore(basicstore):
282 283 def __init__(self, path, opener, pathjoiner):
283 284 self.pathjoiner = pathjoiner
284 285 self.path = self.pathjoiner(path, 'store')
285 286 self.createmode = _calcmode(self.path)
286 287 self._op = opener(self.path)
287 288 self._op.createmode = self.createmode
288 289 self.fncache = fncache(self._op)
289 290
290 291 def fncacheopener(path, mode='r', *args, **kw):
291 292 if (mode not in ('r', 'rb')
292 293 and path.startswith('data/')
293 294 and path not in self.fncache):
294 295 self.fncache.add(path)
295 296 return self._op(hybridencode(path), mode, *args, **kw)
296 297 self.opener = fncacheopener
297 298
298 299 def join(self, f):
299 300 return self.pathjoiner(self.path, hybridencode(f))
300 301
301 302 def datafiles(self):
302 303 rewrite = False
303 304 existing = []
304 305 pjoin = self.pathjoiner
305 306 spath = self.path
306 307 for f in self.fncache:
307 308 ef = hybridencode(f)
308 309 try:
309 310 st = os.stat(pjoin(spath, ef))
310 311 yield f, ef, st.st_size
311 312 existing.append(f)
312 313 except OSError:
313 314 # nonexistent entry
314 315 rewrite = True
315 316 if rewrite:
316 317 # rewrite fncache to remove nonexistent entries
317 318 # (may be caused by rollback / strip)
318 319 self.fncache.rewrite(existing)
319 320
320 321 def copylist(self):
321 322 d = _data + ' dh fncache'
322 323 return (['requires', '00changelog.i'] +
323 324 [self.pathjoiner('store', f) for f in d.split()])
324 325
325 326 def store(requirements, path, opener, pathjoiner=None):
326 327 pathjoiner = pathjoiner or os.path.join
327 328 if 'store' in requirements:
328 329 if 'fncache' in requirements:
329 330 return fncachestore(path, opener, pathjoiner)
330 331 return encodedstore(path, opener, pathjoiner)
331 332 return basicstore(path, opener, pathjoiner)
@@ -1,51 +1,70
1 1 #!/bin/sh
2 2
3 3 echo "% init repo1"
4 4 hg init repo1
5 5 cd repo1
6 6
7 7 echo
8 8 echo "% add a; ci"
9 9 echo "some text" > a
10 10 hg add
11 11 hg ci -m first
12 12
13 13 echo
14 14 echo "% cat .hg/store/fncache"
15 15 cat .hg/store/fncache
16 16
17 17 echo
18 18 echo "% add a.i/b; ci"
19 19 mkdir a.i
20 20 echo "some other text" > a.i/b
21 21 hg add
22 22 hg ci -m second
23 23
24 24 echo
25 25 echo "% cat .hg/store/fncache"
26 26 cat .hg/store/fncache
27 27
28 28 echo
29 29 echo "% add a.i.hg/c; ci"
30 30 mkdir a.i.hg
31 31 echo "yet another text" > a.i.hg/c
32 32 hg add
33 33 hg ci -m third
34 34
35 35 echo
36 36 echo "% cat .hg/store/fncache"
37 37 cat .hg/store/fncache
38 38
39 39 echo
40 40 echo "% hg verify"
41 41 hg verify
42 42
43 43 echo
44 44 echo "% rm .hg/store/fncache"
45 45 rm .hg/store/fncache
46 46
47 47 echo
48 48 echo "% hg verify"
49 49 hg verify
50 50
51 # try non store repo encoding
52 cd ..
53 echo % non store repo
54 hg --config format.usestore=False init foo
55 cd foo
56 mkdir tst.d
57 echo foo > tst.d/foo
58 hg ci -Amfoo
59 ls -R .hg
60
61 cd ..
62 echo % non fncache repo
63 hg --config format.usefncache=False init bar
64 cd bar
65 mkdir tst.d
66 echo foo > tst.d/Foo
67 hg ci -Amfoo
68 ls -R .hg
69
51 70 exit 0
@@ -1,43 +1,81
1 1 % init repo1
2 2
3 3 % add a; ci
4 4 adding a
5 5
6 6 % cat .hg/store/fncache
7 7 data/a.i
8 8
9 9 % add a.i/b; ci
10 10 adding a.i/b
11 11
12 12 % cat .hg/store/fncache
13 13 data/a.i
14 14 data/a.i.hg/b.i
15 15
16 16 % add a.i.hg/c; ci
17 17 adding a.i.hg/c
18 18
19 19 % cat .hg/store/fncache
20 20 data/a.i
21 21 data/a.i.hg/b.i
22 22 data/a.i.hg.hg/c.i
23 23
24 24 % hg verify
25 25 checking changesets
26 26 checking manifests
27 27 crosschecking files in changesets and manifests
28 28 checking files
29 29 3 files, 3 changesets, 3 total revisions
30 30
31 31 % rm .hg/store/fncache
32 32
33 33 % hg verify
34 34 checking changesets
35 35 checking manifests
36 36 crosschecking files in changesets and manifests
37 37 checking files
38 38 data/a.i@0: missing revlog!
39 39 data/a.i.hg/c.i@2: missing revlog!
40 40 data/a.i/b.i@1: missing revlog!
41 41 3 files, 3 changesets, 3 total revisions
42 42 3 integrity errors encountered!
43 43 (first damaged changeset appears to be 0)
44 % non store repo
45 adding tst.d/foo
46 .hg:
47 00changelog.i
48 00manifest.i
49 data
50 dirstate
51 requires
52 undo
53 undo.branch
54 undo.dirstate
55
56 .hg/data:
57 tst.d.hg
58
59 .hg/data/tst.d.hg:
60 foo.i
61 % non fncache repo
62 adding tst.d/Foo
63 .hg:
64 00changelog.i
65 dirstate
66 requires
67 store
68 undo.branch
69 undo.dirstate
70
71 .hg/store:
72 00changelog.i
73 00manifest.i
74 data
75 undo
76
77 .hg/store/data:
78 tst.d.hg
79
80 .hg/store/data/tst.d.hg:
81 _foo.i
General Comments 0
You need to be logged in to leave comments. Login now