##// END OF EJS Templates
store: encode trailing period and space on directory names (issue1417)...
Adrian Buehlmann -
r7515:ee5aba88 default
parent child Browse files
Show More
@@ -1,294 +1,297
1 1 # store.py - repository store handling for Mercurial
2 2 #
3 3 # Copyright 2008 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms
6 6 # of the GNU General Public License, incorporated herein by reference.
7 7
8 8 from i18n import _
9 9 import os, stat, osutil, util
10 10
11 11 _sha = util.sha1
12 12
13 13 def _buildencodefun():
14 14 e = '_'
15 15 win_reserved = [ord(x) for x in '\\:*?"<>|']
16 16 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
17 17 for x in (range(32) + range(126, 256) + win_reserved):
18 18 cmap[chr(x)] = "~%02x" % x
19 19 for x in range(ord("A"), ord("Z")+1) + [ord(e)]:
20 20 cmap[chr(x)] = e + chr(x).lower()
21 21 dmap = {}
22 22 for k, v in cmap.iteritems():
23 23 dmap[v] = k
24 24 def decode(s):
25 25 i = 0
26 26 while i < len(s):
27 27 for l in xrange(1, 4):
28 28 try:
29 29 yield dmap[s[i:i+l]]
30 30 i += l
31 31 break
32 32 except KeyError:
33 33 pass
34 34 else:
35 35 raise KeyError
36 36 return (lambda s: "".join([cmap[c] for c in s]),
37 37 lambda s: "".join(list(decode(s))))
38 38
39 39 encodefilename, decodefilename = _buildencodefun()
40 40
41 41 def _build_lower_encodefun():
42 42 win_reserved = [ord(x) for x in '\\:*?"<>|']
43 43 cmap = dict([ (chr(x), chr(x)) for x in xrange(127) ])
44 44 for x in (range(32) + range(126, 256) + win_reserved):
45 45 cmap[chr(x)] = "~%02x" % x
46 46 for x in range(ord("A"), ord("Z")+1):
47 47 cmap[chr(x)] = chr(x).lower()
48 48 return lambda s: "".join([cmap[c] for c in s])
49 49
50 50 lowerencode = _build_lower_encodefun()
51 51
52 52 _windows_reserved_filenames = '''con prn aux nul
53 53 com1 com2 com3 com4 com5 com6 com7 com8 com9
54 54 lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split()
55 55 def auxencode(path):
56 56 res = []
57 57 for n in path.split('/'):
58 58 if n:
59 59 base = n.split('.')[0]
60 60 if base and (base in _windows_reserved_filenames):
61 61 # encode third letter ('aux' -> 'au~78')
62 62 ec = "~%02x" % ord(n[2])
63 63 n = n[0:2] + ec + n[3:]
64 if n[-1] in '. ':
65 # encode last period or space ('foo...' -> 'foo..~2e')
66 n = n[:-1] + "~%02x" % ord(n[-1])
64 67 res.append(n)
65 68 return '/'.join(res)
66 69
67 70 MAX_PATH_LEN_IN_HGSTORE = 120
68 71 DIR_PREFIX_LEN = 8
69 72 _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4
70 73 def hybridencode(path):
71 74 '''encodes path with a length limit
72 75
73 76 Encodes all paths that begin with 'data/', according to the following.
74 77
75 78 Default encoding (reversible):
76 79
77 80 Encodes all uppercase letters 'X' as '_x'. All reserved or illegal
78 81 characters are encoded as '~xx', where xx is the two digit hex code
79 82 of the character (see encodefilename).
80 83 Relevant path components consisting of Windows reserved filenames are
81 84 masked by encoding the third character ('aux' -> 'au~78', see auxencode).
82 85
83 86 Hashed encoding (not reversible):
84 87
85 88 If the default-encoded path is longer than MAX_PATH_LEN_IN_HGSTORE, a
86 89 non-reversible hybrid hashing of the path is done instead.
87 90 This encoding uses up to DIR_PREFIX_LEN characters of all directory
88 91 levels of the lowerencoded path, but not more levels than can fit into
89 92 _MAX_SHORTENED_DIRS_LEN.
90 93 Then follows the filler followed by the sha digest of the full path.
91 94 The filler is the beginning of the basename of the lowerencoded path
92 95 (the basename is everything after the last path separator). The filler
93 96 is as long as possible, filling in characters from the basename until
94 97 the encoded path has MAX_PATH_LEN_IN_HGSTORE characters (or all chars
95 98 of the basename have been taken).
96 99 The extension (e.g. '.i' or '.d') is preserved.
97 100
98 101 The string 'data/' at the beginning is replaced with 'dh/', if the hashed
99 102 encoding was used.
100 103 '''
101 104 if not path.startswith('data/'):
102 105 return path
103 106 ndpath = path[len('data/'):]
104 107 res = 'data/' + auxencode(encodefilename(ndpath))
105 108 if len(res) > MAX_PATH_LEN_IN_HGSTORE:
106 109 digest = _sha(path).hexdigest()
107 110 aep = auxencode(lowerencode(ndpath))
108 111 _root, ext = os.path.splitext(aep)
109 112 parts = aep.split('/')
110 113 basename = parts[-1]
111 114 sdirs = []
112 115 for p in parts[:-1]:
113 116 d = p[:DIR_PREFIX_LEN]
114 117 if d[-1] in '. ':
115 118 # Windows can't access dirs ending in period or space
116 119 d = d[:-1] + '_'
117 120 t = '/'.join(sdirs) + '/' + d
118 121 if len(t) > _MAX_SHORTENED_DIRS_LEN:
119 122 break
120 123 sdirs.append(d)
121 124 dirs = '/'.join(sdirs)
122 125 if len(dirs) > 0:
123 126 dirs += '/'
124 127 res = 'dh/' + dirs + digest + ext
125 128 space_left = MAX_PATH_LEN_IN_HGSTORE - len(res)
126 129 if space_left > 0:
127 130 filler = basename[:space_left]
128 131 res = 'dh/' + dirs + filler + digest + ext
129 132 return res
130 133
131 134 def _calcmode(path):
132 135 try:
133 136 # files in .hg/ will be created using this mode
134 137 mode = os.stat(path).st_mode
135 138 # avoid some useless chmods
136 139 if (0777 & ~util._umask) == (0777 & mode):
137 140 mode = None
138 141 except OSError:
139 142 mode = None
140 143 return mode
141 144
142 145 _data = 'data 00manifest.d 00manifest.i 00changelog.d 00changelog.i'
143 146
144 147 class basicstore:
145 148 '''base class for local repository stores'''
146 149 def __init__(self, path, opener, pathjoiner):
147 150 self.pathjoiner = pathjoiner
148 151 self.path = path
149 152 self.createmode = _calcmode(path)
150 153 self.opener = opener(self.path)
151 154 self.opener.createmode = self.createmode
152 155
153 156 def join(self, f):
154 157 return self.pathjoiner(self.path, f)
155 158
156 159 def _walk(self, relpath, recurse):
157 160 '''yields (unencoded, encoded, size)'''
158 161 path = self.pathjoiner(self.path, relpath)
159 162 striplen = len(self.path) + len(os.sep)
160 163 l = []
161 164 if os.path.isdir(path):
162 165 visit = [path]
163 166 while visit:
164 167 p = visit.pop()
165 168 for f, kind, st in osutil.listdir(p, stat=True):
166 169 fp = self.pathjoiner(p, f)
167 170 if kind == stat.S_IFREG and f[-2:] in ('.d', '.i'):
168 171 n = util.pconvert(fp[striplen:])
169 172 l.append((n, n, st.st_size))
170 173 elif kind == stat.S_IFDIR and recurse:
171 174 visit.append(fp)
172 175 return util.sort(l)
173 176
174 177 def datafiles(self):
175 178 return self._walk('data', True)
176 179
177 180 def walk(self):
178 181 '''yields (unencoded, encoded, size)'''
179 182 # yield data files first
180 183 for x in self.datafiles():
181 184 yield x
182 185 # yield manifest before changelog
183 186 meta = self._walk('', False)
184 187 meta.reverse()
185 188 for x in meta:
186 189 yield x
187 190
188 191 def copylist(self):
189 192 return ['requires'] + _data.split()
190 193
191 194 class encodedstore(basicstore):
192 195 def __init__(self, path, opener, pathjoiner):
193 196 self.pathjoiner = pathjoiner
194 197 self.path = self.pathjoiner(path, 'store')
195 198 self.createmode = _calcmode(self.path)
196 199 op = opener(self.path)
197 200 op.createmode = self.createmode
198 201 self.opener = lambda f, *args, **kw: op(encodefilename(f), *args, **kw)
199 202
200 203 def datafiles(self):
201 204 for a, b, size in self._walk('data', True):
202 205 try:
203 206 a = decodefilename(a)
204 207 except KeyError:
205 208 a = None
206 209 yield a, b, size
207 210
208 211 def join(self, f):
209 212 return self.pathjoiner(self.path, encodefilename(f))
210 213
211 214 def copylist(self):
212 215 return (['requires', '00changelog.i'] +
213 216 [self.pathjoiner('store', f) for f in _data.split()])
214 217
215 218 def fncache(opener):
216 219 '''yields the entries in the fncache file'''
217 220 try:
218 221 fp = opener('fncache', mode='rb')
219 222 except IOError:
220 223 # skip nonexistent file
221 224 return
222 225 for n, line in enumerate(fp):
223 226 if (len(line) < 2) or (line[-1] != '\n'):
224 227 t = _('invalid entry in fncache, line %s') % (n + 1)
225 228 raise util.Abort(t)
226 229 yield line[:-1]
227 230 fp.close()
228 231
229 232 class fncacheopener(object):
230 233 def __init__(self, opener):
231 234 self.opener = opener
232 235 self.entries = None
233 236
234 237 def loadfncache(self):
235 238 self.entries = {}
236 239 for f in fncache(self.opener):
237 240 self.entries[f] = True
238 241
239 242 def __call__(self, path, mode='r', *args, **kw):
240 243 if mode not in ('r', 'rb') and path.startswith('data/'):
241 244 if self.entries is None:
242 245 self.loadfncache()
243 246 if path not in self.entries:
244 247 self.opener('fncache', 'ab').write(path + '\n')
245 248 # fncache may contain non-existent files after rollback / strip
246 249 self.entries[path] = True
247 250 return self.opener(hybridencode(path), mode, *args, **kw)
248 251
249 252 class fncachestore(basicstore):
250 253 def __init__(self, path, opener, pathjoiner):
251 254 self.pathjoiner = pathjoiner
252 255 self.path = self.pathjoiner(path, 'store')
253 256 self.createmode = _calcmode(self.path)
254 257 self._op = opener(self.path)
255 258 self._op.createmode = self.createmode
256 259 self.opener = fncacheopener(self._op)
257 260
258 261 def join(self, f):
259 262 return self.pathjoiner(self.path, hybridencode(f))
260 263
261 264 def datafiles(self):
262 265 rewrite = False
263 266 existing = []
264 267 pjoin = self.pathjoiner
265 268 spath = self.path
266 269 for f in fncache(self._op):
267 270 ef = hybridencode(f)
268 271 try:
269 272 st = os.stat(pjoin(spath, ef))
270 273 yield f, ef, st.st_size
271 274 existing.append(f)
272 275 except OSError:
273 276 # nonexistent entry
274 277 rewrite = True
275 278 if rewrite:
276 279 # rewrite fncache to remove nonexistent entries
277 280 # (may be caused by rollback / strip)
278 281 fp = self._op('fncache', mode='wb')
279 282 for p in existing:
280 283 fp.write(p + '\n')
281 284 fp.close()
282 285
283 286 def copylist(self):
284 287 d = _data + ' dh fncache'
285 288 return (['requires', '00changelog.i'] +
286 289 [self.pathjoiner('store', f) for f in d.split()])
287 290
288 291 def store(requirements, path, opener, pathjoiner=None):
289 292 pathjoiner = pathjoiner or os.path.join
290 293 if 'store' in requirements:
291 294 if 'fncache' in requirements:
292 295 return fncachestore(path, opener, pathjoiner)
293 296 return encodedstore(path, opener, pathjoiner)
294 297 return basicstore(path, opener, pathjoiner)
@@ -1,18 +1,19
1 1 #!/usr/bin/env python
2 2
3 3 from mercurial import store
4 4
5 5 enc = store.hybridencode # used for fncache repo format
6 6
7 7 def show(s):
8 8 print "A = '%s'" % s
9 9 print "B = '%s'" % enc(s)
10 10 print
11 11
12 12 show('data/aux.bla/bla.aux/prn/PRN/lpt/com3/nul/coma/foo.NUL/normal.c.i')
13 13
14 14 show('data/AUX/SECOND/X.PRN/FOURTH/FI:FTH/SIXTH/SEVENTH/EIGHTH/NINETH/TENTH/ELEVENTH/LOREMIPSUM.TXT.i')
15 15 show('data/enterprise/openesbaddons/contrib-imola/corba-bc/netbeansplugin/wsdlExtension/src/main/java/META-INF/services/org.netbeans.modules.xml.wsdl.bindingsupport.spi.ExtensibilityElementTemplateProvider.i')
16 16 show('data/AUX.THE-QUICK-BROWN-FOX-JU:MPS-OVER-THE-LAZY-DOG-THE-QUICK-BROWN-FOX-JUMPS-OVER-THE-LAZY-DOG.TXT.i')
17 17 show('data/Project Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt')
18 18 show('data/Project.Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt')
19 show('data/foo.../foo / /a./_. /__/.x../ bla/something.i')
@@ -1,18 +1,21
1 1 A = 'data/aux.bla/bla.aux/prn/PRN/lpt/com3/nul/coma/foo.NUL/normal.c.i'
2 2 B = 'data/au~78.bla/bla.aux/pr~6e/_p_r_n/lpt/co~6d3/nu~6c/coma/foo._n_u_l/normal.c.i'
3 3
4 4 A = 'data/AUX/SECOND/X.PRN/FOURTH/FI:FTH/SIXTH/SEVENTH/EIGHTH/NINETH/TENTH/ELEVENTH/LOREMIPSUM.TXT.i'
5 5 B = 'dh/au~78/second/x.prn/fourth/fi~3afth/sixth/seventh/eighth/nineth/tenth/loremia20419e358ddff1bf8751e38288aff1d7c32ec05.i'
6 6
7 7 A = 'data/enterprise/openesbaddons/contrib-imola/corba-bc/netbeansplugin/wsdlExtension/src/main/java/META-INF/services/org.netbeans.modules.xml.wsdl.bindingsupport.spi.ExtensibilityElementTemplateProvider.i'
8 8 B = 'dh/enterpri/openesba/contrib-/corba-bc/netbeans/wsdlexte/src/main/java/org.net7018f27961fdf338a598a40c4683429e7ffb9743.i'
9 9
10 10 A = 'data/AUX.THE-QUICK-BROWN-FOX-JU:MPS-OVER-THE-LAZY-DOG-THE-QUICK-BROWN-FOX-JUMPS-OVER-THE-LAZY-DOG.TXT.i'
11 11 B = 'dh/au~78.the-quick-brown-fox-ju~3amps-over-the-lazy-dog-the-quick-brown-fox-jud4dcadd033000ab2b26eb66bae1906bcb15d4a70.i'
12 12
13 13 A = 'data/Project Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt'
14 14 B = 'dh/project_/resource/anotherl/followed/andanoth/andthenanextremelylongfilenaf93030515d9849cfdca52937c2204d19f83913e5.txt'
15 15
16 16 A = 'data/Project.Planning/Resources/AnotherLongDirectoryName/Followedbyanother/AndAnother/AndThenAnExtremelyLongFileName.txt'
17 17 B = 'dh/project_/resource/anotherl/followed/andanoth/andthenanextremelylongfilena0fd7c506f5c9d58204444fc67e9499006bd2d445.txt'
18 18
19 A = 'data/foo.../foo / /a./_. /__/.x../ bla/something.i'
20 B = 'data/foo..~2e/foo ~20/~20/a~2e/__.~20/____/.x.~2e/ bla/something.i'
21
General Comments 0
You need to be logged in to leave comments. Login now