Show More
@@ -70,9 +70,9 b' def _buildencodefun():' | |||||
70 | 'the\\x07quick\\xadshot' |
|
70 | 'the\\x07quick\\xadshot' | |
71 | ''' |
|
71 | ''' | |
72 | e = '_' |
|
72 | e = '_' | |
73 |
win |
|
73 | winreserved = [ord(x) for x in '\\:*?"<>|'] | |
74 | cmap = dict([(chr(x), chr(x)) for x in xrange(127)]) |
|
74 | cmap = dict([(chr(x), chr(x)) for x in xrange(127)]) | |
75 |
for x in (range(32) + range(126, 256) + win |
|
75 | for x in (range(32) + range(126, 256) + winreserved): | |
76 | cmap[chr(x)] = "~%02x" % x |
|
76 | cmap[chr(x)] = "~%02x" % x | |
77 | for x in range(ord("A"), ord("Z")+1) + [ord(e)]: |
|
77 | for x in range(ord("A"), ord("Z")+1) + [ord(e)]: | |
78 | cmap[chr(x)] = e + chr(x).lower() |
|
78 | cmap[chr(x)] = e + chr(x).lower() | |
@@ -96,9 +96,9 b' def _buildencodefun():' | |||||
96 |
|
96 | |||
97 | encodefilename, decodefilename = _buildencodefun() |
|
97 | encodefilename, decodefilename = _buildencodefun() | |
98 |
|
98 | |||
99 |
def _build |
|
99 | def _buildlowerencodefun(): | |
100 | ''' |
|
100 | ''' | |
101 |
>>> f = _build |
|
101 | >>> f = _buildlowerencodefun() | |
102 | >>> f('nothing/special.txt') |
|
102 | >>> f('nothing/special.txt') | |
103 | 'nothing/special.txt' |
|
103 | 'nothing/special.txt' | |
104 | >>> f('HELLO') |
|
104 | >>> f('HELLO') | |
@@ -108,17 +108,17 b' def _build_lower_encodefun():' | |||||
108 | >>> f('the\x07quick\xADshot') |
|
108 | >>> f('the\x07quick\xADshot') | |
109 | 'the~07quick~adshot' |
|
109 | 'the~07quick~adshot' | |
110 | ''' |
|
110 | ''' | |
111 |
win |
|
111 | winreserved = [ord(x) for x in '\\:*?"<>|'] | |
112 | cmap = dict([(chr(x), chr(x)) for x in xrange(127)]) |
|
112 | cmap = dict([(chr(x), chr(x)) for x in xrange(127)]) | |
113 |
for x in (range(32) + range(126, 256) + win |
|
113 | for x in (range(32) + range(126, 256) + winreserved): | |
114 | cmap[chr(x)] = "~%02x" % x |
|
114 | cmap[chr(x)] = "~%02x" % x | |
115 | for x in range(ord("A"), ord("Z")+1): |
|
115 | for x in range(ord("A"), ord("Z")+1): | |
116 | cmap[chr(x)] = chr(x).lower() |
|
116 | cmap[chr(x)] = chr(x).lower() | |
117 | return lambda s: "".join([cmap[c] for c in s]) |
|
117 | return lambda s: "".join([cmap[c] for c in s]) | |
118 |
|
118 | |||
119 |
lowerencode = _build |
|
119 | lowerencode = _buildlowerencodefun() | |
120 |
|
120 | |||
121 |
_win |
|
121 | _winreservednames = '''con prn aux nul | |
122 | com1 com2 com3 com4 com5 com6 com7 com8 com9 |
|
122 | com1 com2 com3 com4 com5 com6 com7 com8 com9 | |
123 | lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() |
|
123 | lpt1 lpt2 lpt3 lpt4 lpt5 lpt6 lpt7 lpt8 lpt9'''.split() | |
124 | def _auxencode(path, dotencode): |
|
124 | def _auxencode(path, dotencode): | |
@@ -143,7 +143,7 b' def _auxencode(path, dotencode):' | |||||
143 | for n in path.split('/'): |
|
143 | for n in path.split('/'): | |
144 | if n: |
|
144 | if n: | |
145 | base = n.split('.')[0] |
|
145 | base = n.split('.')[0] | |
146 |
if base and (base in _win |
|
146 | if base and (base in _winreservednames): | |
147 | # encode third letter ('aux' -> 'au~78') |
|
147 | # encode third letter ('aux' -> 'au~78') | |
148 | ec = "~%02x" % ord(n[2]) |
|
148 | ec = "~%02x" % ord(n[2]) | |
149 | n = n[0:2] + ec + n[3:] |
|
149 | n = n[0:2] + ec + n[3:] | |
@@ -155,9 +155,9 b' def _auxencode(path, dotencode):' | |||||
155 | res.append(n) |
|
155 | res.append(n) | |
156 | return '/'.join(res) |
|
156 | return '/'.join(res) | |
157 |
|
157 | |||
158 | MAX_PATH_LEN_IN_HGSTORE = 120 |
|
158 | _maxstorepathlen = 120 | |
159 | DIR_PREFIX_LEN = 8 |
|
159 | _dirprefixlen = 8 | |
160 | _MAX_SHORTENED_DIRS_LEN = 8 * (DIR_PREFIX_LEN + 1) - 4 |
|
160 | _maxshortdirslen = 8 * (_dirprefixlen + 1) - 4 | |
161 | def _hybridencode(path, auxencode): |
|
161 | def _hybridencode(path, auxencode): | |
162 | '''encodes path with a length limit |
|
162 | '''encodes path with a length limit | |
163 |
|
163 | |||
@@ -173,17 +173,17 b' def _hybridencode(path, auxencode):' | |||||
173 |
|
173 | |||
174 | Hashed encoding (not reversible): |
|
174 | Hashed encoding (not reversible): | |
175 |
|
175 | |||
176 |
If the default-encoded path is longer than |
|
176 | If the default-encoded path is longer than _maxstorepathlen, a | |
177 | non-reversible hybrid hashing of the path is done instead. |
|
177 | non-reversible hybrid hashing of the path is done instead. | |
178 |
This encoding uses up to |
|
178 | This encoding uses up to _dirprefixlen characters of all directory | |
179 | levels of the lowerencoded path, but not more levels than can fit into |
|
179 | levels of the lowerencoded path, but not more levels than can fit into | |
180 | _MAX_SHORTENED_DIRS_LEN. |
|
180 | _maxshortdirslen. | |
181 | Then follows the filler followed by the sha digest of the full path. |
|
181 | Then follows the filler followed by the sha digest of the full path. | |
182 | The filler is the beginning of the basename of the lowerencoded path |
|
182 | The filler is the beginning of the basename of the lowerencoded path | |
183 | (the basename is everything after the last path separator). The filler |
|
183 | (the basename is everything after the last path separator). The filler | |
184 | is as long as possible, filling in characters from the basename until |
|
184 | is as long as possible, filling in characters from the basename until | |
185 |
the encoded path has |
|
185 | the encoded path has _maxstorepathlen characters (or all chars of the | |
186 |
|
|
186 | basename have been taken). | |
187 | The extension (e.g. '.i' or '.d') is preserved. |
|
187 | The extension (e.g. '.i' or '.d') is preserved. | |
188 |
|
188 | |||
189 | The string 'data/' at the beginning is replaced with 'dh/', if the hashed |
|
189 | The string 'data/' at the beginning is replaced with 'dh/', if the hashed | |
@@ -195,7 +195,7 b' def _hybridencode(path, auxencode):' | |||||
195 | path = encodedir(path) |
|
195 | path = encodedir(path) | |
196 | ndpath = path[len('data/'):] |
|
196 | ndpath = path[len('data/'):] | |
197 | res = 'data/' + auxencode(encodefilename(ndpath)) |
|
197 | res = 'data/' + auxencode(encodefilename(ndpath)) | |
198 | if len(res) > MAX_PATH_LEN_IN_HGSTORE: |
|
198 | if len(res) > _maxstorepathlen: | |
199 | digest = _sha(path).hexdigest() |
|
199 | digest = _sha(path).hexdigest() | |
200 | aep = auxencode(lowerencode(ndpath)) |
|
200 | aep = auxencode(lowerencode(ndpath)) | |
201 | _root, ext = os.path.splitext(aep) |
|
201 | _root, ext = os.path.splitext(aep) | |
@@ -203,21 +203,21 b' def _hybridencode(path, auxencode):' | |||||
203 | basename = parts[-1] |
|
203 | basename = parts[-1] | |
204 | sdirs = [] |
|
204 | sdirs = [] | |
205 | for p in parts[:-1]: |
|
205 | for p in parts[:-1]: | |
206 |
d = p[: |
|
206 | d = p[:_dirprefixlen] | |
207 | if d[-1] in '. ': |
|
207 | if d[-1] in '. ': | |
208 | # Windows can't access dirs ending in period or space |
|
208 | # Windows can't access dirs ending in period or space | |
209 | d = d[:-1] + '_' |
|
209 | d = d[:-1] + '_' | |
210 | t = '/'.join(sdirs) + '/' + d |
|
210 | t = '/'.join(sdirs) + '/' + d | |
211 | if len(t) > _MAX_SHORTENED_DIRS_LEN: |
|
211 | if len(t) > _maxshortdirslen: | |
212 | break |
|
212 | break | |
213 | sdirs.append(d) |
|
213 | sdirs.append(d) | |
214 | dirs = '/'.join(sdirs) |
|
214 | dirs = '/'.join(sdirs) | |
215 | if len(dirs) > 0: |
|
215 | if len(dirs) > 0: | |
216 | dirs += '/' |
|
216 | dirs += '/' | |
217 | res = 'dh/' + dirs + digest + ext |
|
217 | res = 'dh/' + dirs + digest + ext | |
218 |
space |
|
218 | spaceleft = _maxstorepathlen - len(res) | |
219 |
if space |
|
219 | if spaceleft > 0: | |
220 |
filler = basename[:space |
|
220 | filler = basename[:spaceleft] | |
221 | res = 'dh/' + dirs + filler + digest + ext |
|
221 | res = 'dh/' + dirs + filler + digest + ext | |
222 | return res |
|
222 | return res | |
223 |
|
223 |
General Comments 0
You need to be logged in to leave comments.
Login now