Show More
@@ -1,539 +1,539 b'' | |||||
1 | from __future__ import absolute_import |
|
1 | from __future__ import absolute_import | |
2 |
|
2 | |||
3 | import collections |
|
3 | import collections | |
4 | import errno |
|
4 | import errno | |
5 | import hashlib |
|
5 | import hashlib | |
6 | import mmap |
|
6 | import mmap | |
7 | import os |
|
7 | import os | |
8 | import struct |
|
8 | import struct | |
9 | import time |
|
9 | import time | |
10 |
|
10 | |||
11 | from mercurial.i18n import _ |
|
11 | from mercurial.i18n import _ | |
12 | from mercurial import ( |
|
12 | from mercurial import ( | |
13 | policy, |
|
13 | policy, | |
14 | pycompat, |
|
14 | pycompat, | |
15 | util, |
|
15 | util, | |
16 | vfs as vfsmod, |
|
16 | vfs as vfsmod, | |
17 | ) |
|
17 | ) | |
18 | from . import shallowutil |
|
18 | from . import shallowutil | |
19 |
|
19 | |||
20 | osutil = policy.importmod(r'osutil') |
|
20 | osutil = policy.importmod(r'osutil') | |
21 |
|
21 | |||
22 | # The pack version supported by this implementation. This will need to be |
|
22 | # The pack version supported by this implementation. This will need to be | |
23 | # rev'd whenever the byte format changes. Ex: changing the fanout prefix, |
|
23 | # rev'd whenever the byte format changes. Ex: changing the fanout prefix, | |
24 | # changing any of the int sizes, changing the delta algorithm, etc. |
|
24 | # changing any of the int sizes, changing the delta algorithm, etc. | |
25 | PACKVERSIONSIZE = 1 |
|
25 | PACKVERSIONSIZE = 1 | |
26 | INDEXVERSIONSIZE = 2 |
|
26 | INDEXVERSIONSIZE = 2 | |
27 |
|
27 | |||
28 | FANOUTSTART = INDEXVERSIONSIZE |
|
28 | FANOUTSTART = INDEXVERSIONSIZE | |
29 |
|
29 | |||
30 | # Constant that indicates a fanout table entry hasn't been filled in. (This does |
|
30 | # Constant that indicates a fanout table entry hasn't been filled in. (This does | |
31 | # not get serialized) |
|
31 | # not get serialized) | |
32 | EMPTYFANOUT = -1 |
|
32 | EMPTYFANOUT = -1 | |
33 |
|
33 | |||
34 | # The fanout prefix is the number of bytes that can be addressed by the fanout |
|
34 | # The fanout prefix is the number of bytes that can be addressed by the fanout | |
35 | # table. Example: a fanout prefix of 1 means we use the first byte of a hash to |
|
35 | # table. Example: a fanout prefix of 1 means we use the first byte of a hash to | |
36 | # look in the fanout table (which will be 2^8 entries long). |
|
36 | # look in the fanout table (which will be 2^8 entries long). | |
37 | SMALLFANOUTPREFIX = 1 |
|
37 | SMALLFANOUTPREFIX = 1 | |
38 | LARGEFANOUTPREFIX = 2 |
|
38 | LARGEFANOUTPREFIX = 2 | |
39 |
|
39 | |||
40 | # The number of entries in the index at which point we switch to a large fanout. |
|
40 | # The number of entries in the index at which point we switch to a large fanout. | |
41 | # It is chosen to balance the linear scan through a sparse fanout, with the |
|
41 | # It is chosen to balance the linear scan through a sparse fanout, with the | |
42 | # size of the bisect in actual index. |
|
42 | # size of the bisect in actual index. | |
43 | # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step |
|
43 | # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step | |
44 | # bisect) with (8 step fanout scan + 1 step bisect) |
|
44 | # bisect) with (8 step fanout scan + 1 step bisect) | |
45 | # 5 step bisect = log(2^16 / 8 / 255) # fanout |
|
45 | # 5 step bisect = log(2^16 / 8 / 255) # fanout | |
46 | # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries |
|
46 | # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries | |
47 | SMALLFANOUTCUTOFF = 2**16 / 8 |
|
47 | SMALLFANOUTCUTOFF = 2**16 / 8 | |
48 |
|
48 | |||
49 | # The amount of time to wait between checking for new packs. This prevents an |
|
49 | # The amount of time to wait between checking for new packs. This prevents an | |
50 | # exception when data is moved to a new pack after the process has already |
|
50 | # exception when data is moved to a new pack after the process has already | |
51 | # loaded the pack list. |
|
51 | # loaded the pack list. | |
52 | REFRESHRATE = 0.1 |
|
52 | REFRESHRATE = 0.1 | |
53 |
|
53 | |||
54 | if pycompat.isposix: |
|
54 | if pycompat.isposix: | |
55 | # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening. |
|
55 | # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening. | |
56 | # The 'e' flag will be ignored on older versions of glibc. |
|
56 | # The 'e' flag will be ignored on older versions of glibc. | |
57 | PACKOPENMODE = 'rbe' |
|
57 | PACKOPENMODE = 'rbe' | |
58 | else: |
|
58 | else: | |
59 | PACKOPENMODE = 'rb' |
|
59 | PACKOPENMODE = 'rb' | |
60 |
|
60 | |||
61 | class _cachebackedpacks(object): |
|
61 | class _cachebackedpacks(object): | |
62 | def __init__(self, packs, cachesize): |
|
62 | def __init__(self, packs, cachesize): | |
63 | self._packs = set(packs) |
|
63 | self._packs = set(packs) | |
64 | self._lrucache = util.lrucachedict(cachesize) |
|
64 | self._lrucache = util.lrucachedict(cachesize) | |
65 | self._lastpack = None |
|
65 | self._lastpack = None | |
66 |
|
66 | |||
67 | # Avoid cold start of the cache by populating the most recent packs |
|
67 | # Avoid cold start of the cache by populating the most recent packs | |
68 | # in the cache. |
|
68 | # in the cache. | |
69 | for i in reversed(range(min(cachesize, len(packs)))): |
|
69 | for i in reversed(range(min(cachesize, len(packs)))): | |
70 | self._movetofront(packs[i]) |
|
70 | self._movetofront(packs[i]) | |
71 |
|
71 | |||
72 | def _movetofront(self, pack): |
|
72 | def _movetofront(self, pack): | |
73 | # This effectively makes pack the first entry in the cache. |
|
73 | # This effectively makes pack the first entry in the cache. | |
74 | self._lrucache[pack] = True |
|
74 | self._lrucache[pack] = True | |
75 |
|
75 | |||
76 | def _registerlastpackusage(self): |
|
76 | def _registerlastpackusage(self): | |
77 | if self._lastpack is not None: |
|
77 | if self._lastpack is not None: | |
78 | self._movetofront(self._lastpack) |
|
78 | self._movetofront(self._lastpack) | |
79 | self._lastpack = None |
|
79 | self._lastpack = None | |
80 |
|
80 | |||
81 | def add(self, pack): |
|
81 | def add(self, pack): | |
82 | self._registerlastpackusage() |
|
82 | self._registerlastpackusage() | |
83 |
|
83 | |||
84 | # This method will mostly be called when packs are not in cache. |
|
84 | # This method will mostly be called when packs are not in cache. | |
85 | # Therefore, adding pack to the cache. |
|
85 | # Therefore, adding pack to the cache. | |
86 | self._movetofront(pack) |
|
86 | self._movetofront(pack) | |
87 | self._packs.add(pack) |
|
87 | self._packs.add(pack) | |
88 |
|
88 | |||
89 | def __iter__(self): |
|
89 | def __iter__(self): | |
90 | self._registerlastpackusage() |
|
90 | self._registerlastpackusage() | |
91 |
|
91 | |||
92 | # Cache iteration is based on LRU. |
|
92 | # Cache iteration is based on LRU. | |
93 | for pack in self._lrucache: |
|
93 | for pack in self._lrucache: | |
94 | self._lastpack = pack |
|
94 | self._lastpack = pack | |
95 | yield pack |
|
95 | yield pack | |
96 |
|
96 | |||
97 | cachedpacks = set(pack for pack in self._lrucache) |
|
97 | cachedpacks = set(pack for pack in self._lrucache) | |
98 | # Yield for paths not in the cache. |
|
98 | # Yield for paths not in the cache. | |
99 | for pack in self._packs - cachedpacks: |
|
99 | for pack in self._packs - cachedpacks: | |
100 | self._lastpack = pack |
|
100 | self._lastpack = pack | |
101 | yield pack |
|
101 | yield pack | |
102 |
|
102 | |||
103 | # Data not found in any pack. |
|
103 | # Data not found in any pack. | |
104 | self._lastpack = None |
|
104 | self._lastpack = None | |
105 |
|
105 | |||
106 | class basepackstore(object): |
|
106 | class basepackstore(object): | |
107 | # Default cache size limit for the pack files. |
|
107 | # Default cache size limit for the pack files. | |
108 | DEFAULTCACHESIZE = 100 |
|
108 | DEFAULTCACHESIZE = 100 | |
109 |
|
109 | |||
110 | def __init__(self, ui, path): |
|
110 | def __init__(self, ui, path): | |
111 | self.ui = ui |
|
111 | self.ui = ui | |
112 | self.path = path |
|
112 | self.path = path | |
113 |
|
113 | |||
114 | # lastrefesh is 0 so we'll immediately check for new packs on the first |
|
114 | # lastrefesh is 0 so we'll immediately check for new packs on the first | |
115 | # failure. |
|
115 | # failure. | |
116 | self.lastrefresh = 0 |
|
116 | self.lastrefresh = 0 | |
117 |
|
117 | |||
118 | packs = [] |
|
118 | packs = [] | |
119 | for filepath, __, __ in self._getavailablepackfilessorted(): |
|
119 | for filepath, __, __ in self._getavailablepackfilessorted(): | |
120 | try: |
|
120 | try: | |
121 | pack = self.getpack(filepath) |
|
121 | pack = self.getpack(filepath) | |
122 | except Exception as ex: |
|
122 | except Exception as ex: | |
123 | # An exception may be thrown if the pack file is corrupted |
|
123 | # An exception may be thrown if the pack file is corrupted | |
124 | # somehow. Log a warning but keep going in this case, just |
|
124 | # somehow. Log a warning but keep going in this case, just | |
125 | # skipping this pack file. |
|
125 | # skipping this pack file. | |
126 | # |
|
126 | # | |
127 | # If this is an ENOENT error then don't even bother logging. |
|
127 | # If this is an ENOENT error then don't even bother logging. | |
128 | # Someone could have removed the file since we retrieved the |
|
128 | # Someone could have removed the file since we retrieved the | |
129 | # list of paths. |
|
129 | # list of paths. | |
130 | if getattr(ex, 'errno', None) != errno.ENOENT: |
|
130 | if getattr(ex, 'errno', None) != errno.ENOENT: | |
131 | ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex)) |
|
131 | ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex)) | |
132 | continue |
|
132 | continue | |
133 | packs.append(pack) |
|
133 | packs.append(pack) | |
134 |
|
134 | |||
135 | self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE) |
|
135 | self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE) | |
136 |
|
136 | |||
137 | def _getavailablepackfiles(self): |
|
137 | def _getavailablepackfiles(self): | |
138 | """For each pack file (a index/data file combo), yields: |
|
138 | """For each pack file (a index/data file combo), yields: | |
139 | (full path without extension, mtime, size) |
|
139 | (full path without extension, mtime, size) | |
140 |
|
140 | |||
141 | mtime will be the mtime of the index/data file (whichever is newer) |
|
141 | mtime will be the mtime of the index/data file (whichever is newer) | |
142 | size is the combined size of index/data file |
|
142 | size is the combined size of index/data file | |
143 | """ |
|
143 | """ | |
144 | indexsuffixlen = len(self.INDEXSUFFIX) |
|
144 | indexsuffixlen = len(self.INDEXSUFFIX) | |
145 | packsuffixlen = len(self.PACKSUFFIX) |
|
145 | packsuffixlen = len(self.PACKSUFFIX) | |
146 |
|
146 | |||
147 | ids = set() |
|
147 | ids = set() | |
148 | sizes = collections.defaultdict(lambda: 0) |
|
148 | sizes = collections.defaultdict(lambda: 0) | |
149 | mtimes = collections.defaultdict(lambda: []) |
|
149 | mtimes = collections.defaultdict(lambda: []) | |
150 | try: |
|
150 | try: | |
151 | for filename, type, stat in osutil.listdir(self.path, stat=True): |
|
151 | for filename, type, stat in osutil.listdir(self.path, stat=True): | |
152 | id = None |
|
152 | id = None | |
153 | if filename[-indexsuffixlen:] == self.INDEXSUFFIX: |
|
153 | if filename[-indexsuffixlen:] == self.INDEXSUFFIX: | |
154 | id = filename[:-indexsuffixlen] |
|
154 | id = filename[:-indexsuffixlen] | |
155 | elif filename[-packsuffixlen:] == self.PACKSUFFIX: |
|
155 | elif filename[-packsuffixlen:] == self.PACKSUFFIX: | |
156 | id = filename[:-packsuffixlen] |
|
156 | id = filename[:-packsuffixlen] | |
157 |
|
157 | |||
158 | # Since we expect to have two files corresponding to each ID |
|
158 | # Since we expect to have two files corresponding to each ID | |
159 | # (the index file and the pack file), we can yield once we see |
|
159 | # (the index file and the pack file), we can yield once we see | |
160 | # it twice. |
|
160 | # it twice. | |
161 | if id: |
|
161 | if id: | |
162 | sizes[id] += stat.st_size # Sum both files' sizes together |
|
162 | sizes[id] += stat.st_size # Sum both files' sizes together | |
163 | mtimes[id].append(stat.st_mtime) |
|
163 | mtimes[id].append(stat.st_mtime) | |
164 | if id in ids: |
|
164 | if id in ids: | |
165 | yield (os.path.join(self.path, id), max(mtimes[id]), |
|
165 | yield (os.path.join(self.path, id), max(mtimes[id]), | |
166 | sizes[id]) |
|
166 | sizes[id]) | |
167 | else: |
|
167 | else: | |
168 | ids.add(id) |
|
168 | ids.add(id) | |
169 | except OSError as ex: |
|
169 | except OSError as ex: | |
170 | if ex.errno != errno.ENOENT: |
|
170 | if ex.errno != errno.ENOENT: | |
171 | raise |
|
171 | raise | |
172 |
|
172 | |||
173 | def _getavailablepackfilessorted(self): |
|
173 | def _getavailablepackfilessorted(self): | |
174 | """Like `_getavailablepackfiles`, but also sorts the files by mtime, |
|
174 | """Like `_getavailablepackfiles`, but also sorts the files by mtime, | |
175 | yielding newest files first. |
|
175 | yielding newest files first. | |
176 |
|
176 | |||
177 | This is desirable, since it is more likely newer packfiles have more |
|
177 | This is desirable, since it is more likely newer packfiles have more | |
178 | desirable data. |
|
178 | desirable data. | |
179 | """ |
|
179 | """ | |
180 | files = [] |
|
180 | files = [] | |
181 | for path, mtime, size in self._getavailablepackfiles(): |
|
181 | for path, mtime, size in self._getavailablepackfiles(): | |
182 | files.append((mtime, size, path)) |
|
182 | files.append((mtime, size, path)) | |
183 | files = sorted(files, reverse=True) |
|
183 | files = sorted(files, reverse=True) | |
184 | for mtime, size, path in files: |
|
184 | for mtime, size, path in files: | |
185 | yield path, mtime, size |
|
185 | yield path, mtime, size | |
186 |
|
186 | |||
187 | def gettotalsizeandcount(self): |
|
187 | def gettotalsizeandcount(self): | |
188 | """Returns the total disk size (in bytes) of all the pack files in |
|
188 | """Returns the total disk size (in bytes) of all the pack files in | |
189 | this store, and the count of pack files. |
|
189 | this store, and the count of pack files. | |
190 |
|
190 | |||
191 | (This might be smaller than the total size of the ``self.path`` |
|
191 | (This might be smaller than the total size of the ``self.path`` | |
192 | directory, since this only considers fuly-writen pack files, and not |
|
192 | directory, since this only considers fuly-writen pack files, and not | |
193 | temporary files or other detritus on the directory.) |
|
193 | temporary files or other detritus on the directory.) | |
194 | """ |
|
194 | """ | |
195 | totalsize = 0 |
|
195 | totalsize = 0 | |
196 | count = 0 |
|
196 | count = 0 | |
197 | for __, __, size in self._getavailablepackfiles(): |
|
197 | for __, __, size in self._getavailablepackfiles(): | |
198 | totalsize += size |
|
198 | totalsize += size | |
199 | count += 1 |
|
199 | count += 1 | |
200 | return totalsize, count |
|
200 | return totalsize, count | |
201 |
|
201 | |||
202 | def getmetrics(self): |
|
202 | def getmetrics(self): | |
203 | """Returns metrics on the state of this store.""" |
|
203 | """Returns metrics on the state of this store.""" | |
204 | size, count = self.gettotalsizeandcount() |
|
204 | size, count = self.gettotalsizeandcount() | |
205 | return { |
|
205 | return { | |
206 | 'numpacks': count, |
|
206 | 'numpacks': count, | |
207 | 'totalpacksize': size, |
|
207 | 'totalpacksize': size, | |
208 | } |
|
208 | } | |
209 |
|
209 | |||
210 | def getpack(self, path): |
|
210 | def getpack(self, path): | |
211 | raise NotImplementedError() |
|
211 | raise NotImplementedError() | |
212 |
|
212 | |||
213 | def getmissing(self, keys): |
|
213 | def getmissing(self, keys): | |
214 | missing = keys |
|
214 | missing = keys | |
215 | for pack in self.packs: |
|
215 | for pack in self.packs: | |
216 | missing = pack.getmissing(missing) |
|
216 | missing = pack.getmissing(missing) | |
217 |
|
217 | |||
218 | # Ensures better performance of the cache by keeping the most |
|
218 | # Ensures better performance of the cache by keeping the most | |
219 | # recently accessed pack at the beginning in subsequent iterations. |
|
219 | # recently accessed pack at the beginning in subsequent iterations. | |
220 | if not missing: |
|
220 | if not missing: | |
221 | return missing |
|
221 | return missing | |
222 |
|
222 | |||
223 | if missing: |
|
223 | if missing: | |
224 | for pack in self.refresh(): |
|
224 | for pack in self.refresh(): | |
225 | missing = pack.getmissing(missing) |
|
225 | missing = pack.getmissing(missing) | |
226 |
|
226 | |||
227 | return missing |
|
227 | return missing | |
228 |
|
228 | |||
229 | def markledger(self, ledger, options=None): |
|
229 | def markledger(self, ledger, options=None): | |
230 | for pack in self.packs: |
|
230 | for pack in self.packs: | |
231 | pack.markledger(ledger) |
|
231 | pack.markledger(ledger) | |
232 |
|
232 | |||
233 | def markforrefresh(self): |
|
233 | def markforrefresh(self): | |
234 | """Tells the store that there may be new pack files, so the next time it |
|
234 | """Tells the store that there may be new pack files, so the next time it | |
235 | has a lookup miss it should check for new files.""" |
|
235 | has a lookup miss it should check for new files.""" | |
236 | self.lastrefresh = 0 |
|
236 | self.lastrefresh = 0 | |
237 |
|
237 | |||
238 | def refresh(self): |
|
238 | def refresh(self): | |
239 | """Checks for any new packs on disk, adds them to the main pack list, |
|
239 | """Checks for any new packs on disk, adds them to the main pack list, | |
240 | and returns a list of just the new packs.""" |
|
240 | and returns a list of just the new packs.""" | |
241 | now = time.time() |
|
241 | now = time.time() | |
242 |
|
242 | |||
243 | # If we experience a lot of misses (like in the case of getmissing() on |
|
243 | # If we experience a lot of misses (like in the case of getmissing() on | |
244 | # new objects), let's only actually check disk for new stuff every once |
|
244 | # new objects), let's only actually check disk for new stuff every once | |
245 | # in a while. Generally this code path should only ever matter when a |
|
245 | # in a while. Generally this code path should only ever matter when a | |
246 | # repack is going on in the background, and that should be pretty rare |
|
246 | # repack is going on in the background, and that should be pretty rare | |
247 | # to have that happen twice in quick succession. |
|
247 | # to have that happen twice in quick succession. | |
248 | newpacks = [] |
|
248 | newpacks = [] | |
249 | if now > self.lastrefresh + REFRESHRATE: |
|
249 | if now > self.lastrefresh + REFRESHRATE: | |
250 | self.lastrefresh = now |
|
250 | self.lastrefresh = now | |
251 | previous = set(p.path for p in self.packs) |
|
251 | previous = set(p.path for p in self.packs) | |
252 | for filepath, __, __ in self._getavailablepackfilessorted(): |
|
252 | for filepath, __, __ in self._getavailablepackfilessorted(): | |
253 | if filepath not in previous: |
|
253 | if filepath not in previous: | |
254 | newpack = self.getpack(filepath) |
|
254 | newpack = self.getpack(filepath) | |
255 | newpacks.append(newpack) |
|
255 | newpacks.append(newpack) | |
256 | self.packs.add(newpack) |
|
256 | self.packs.add(newpack) | |
257 |
|
257 | |||
258 | return newpacks |
|
258 | return newpacks | |
259 |
|
259 | |||
260 | class versionmixin(object): |
|
260 | class versionmixin(object): | |
261 | # Mix-in for classes with multiple supported versions |
|
261 | # Mix-in for classes with multiple supported versions | |
262 | VERSION = None |
|
262 | VERSION = None | |
263 | SUPPORTED_VERSIONS = [2] |
|
263 | SUPPORTED_VERSIONS = [2] | |
264 |
|
264 | |||
265 | def _checkversion(self, version): |
|
265 | def _checkversion(self, version): | |
266 | if version in self.SUPPORTED_VERSIONS: |
|
266 | if version in self.SUPPORTED_VERSIONS: | |
267 | if self.VERSION is None: |
|
267 | if self.VERSION is None: | |
268 | # only affect this instance |
|
268 | # only affect this instance | |
269 | self.VERSION = version |
|
269 | self.VERSION = version | |
270 | elif self.VERSION != version: |
|
270 | elif self.VERSION != version: | |
271 | raise RuntimeError('inconsistent version: %s' % version) |
|
271 | raise RuntimeError('inconsistent version: %s' % version) | |
272 | else: |
|
272 | else: | |
273 | raise RuntimeError('unsupported version: %s' % version) |
|
273 | raise RuntimeError('unsupported version: %s' % version) | |
274 |
|
274 | |||
275 | class basepack(versionmixin): |
|
275 | class basepack(versionmixin): | |
276 | # The maximum amount we should read via mmap before remmaping so the old |
|
276 | # The maximum amount we should read via mmap before remmaping so the old | |
277 | # pages can be released (100MB) |
|
277 | # pages can be released (100MB) | |
278 | MAXPAGEDIN = 100 * 1024**2 |
|
278 | MAXPAGEDIN = 100 * 1024**2 | |
279 |
|
279 | |||
280 | SUPPORTED_VERSIONS = [2] |
|
280 | SUPPORTED_VERSIONS = [2] | |
281 |
|
281 | |||
282 | def __init__(self, path): |
|
282 | def __init__(self, path): | |
283 | self.path = path |
|
283 | self.path = path | |
284 | self.packpath = path + self.PACKSUFFIX |
|
284 | self.packpath = path + self.PACKSUFFIX | |
285 | self.indexpath = path + self.INDEXSUFFIX |
|
285 | self.indexpath = path + self.INDEXSUFFIX | |
286 |
|
286 | |||
287 | self.indexsize = os.stat(self.indexpath).st_size |
|
287 | self.indexsize = os.stat(self.indexpath).st_size | |
288 | self.datasize = os.stat(self.packpath).st_size |
|
288 | self.datasize = os.stat(self.packpath).st_size | |
289 |
|
289 | |||
290 | self._index = None |
|
290 | self._index = None | |
291 | self._data = None |
|
291 | self._data = None | |
292 | self.freememory() # initialize the mmap |
|
292 | self.freememory() # initialize the mmap | |
293 |
|
293 | |||
294 | version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0] |
|
294 | version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0] | |
295 | self._checkversion(version) |
|
295 | self._checkversion(version) | |
296 |
|
296 | |||
297 | version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE]) |
|
297 | version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE]) | |
298 | self._checkversion(version) |
|
298 | self._checkversion(version) | |
299 |
|
299 | |||
300 | if 0b10000000 & config: |
|
300 | if 0b10000000 & config: | |
301 | self.params = indexparams(LARGEFANOUTPREFIX, version) |
|
301 | self.params = indexparams(LARGEFANOUTPREFIX, version) | |
302 | else: |
|
302 | else: | |
303 | self.params = indexparams(SMALLFANOUTPREFIX, version) |
|
303 | self.params = indexparams(SMALLFANOUTPREFIX, version) | |
304 |
|
304 | |||
305 | @util.propertycache |
|
305 | @util.propertycache | |
306 | def _fanouttable(self): |
|
306 | def _fanouttable(self): | |
307 | params = self.params |
|
307 | params = self.params | |
308 | rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize] |
|
308 | rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize] | |
309 | fanouttable = [] |
|
309 | fanouttable = [] | |
310 | for i in pycompat.xrange(0, params.fanoutcount): |
|
310 | for i in pycompat.xrange(0, params.fanoutcount): | |
311 | loc = i * 4 |
|
311 | loc = i * 4 | |
312 | fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0] |
|
312 | fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0] | |
313 | fanouttable.append(fanoutentry) |
|
313 | fanouttable.append(fanoutentry) | |
314 | return fanouttable |
|
314 | return fanouttable | |
315 |
|
315 | |||
316 | @util.propertycache |
|
316 | @util.propertycache | |
317 | def _indexend(self): |
|
317 | def _indexend(self): | |
318 | nodecount = struct.unpack_from('!Q', self._index, |
|
318 | nodecount = struct.unpack_from('!Q', self._index, | |
319 | self.params.indexstart - 8)[0] |
|
319 | self.params.indexstart - 8)[0] | |
320 | return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH |
|
320 | return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH | |
321 |
|
321 | |||
322 | def freememory(self): |
|
322 | def freememory(self): | |
323 | """Unmap and remap the memory to free it up after known expensive |
|
323 | """Unmap and remap the memory to free it up after known expensive | |
324 | operations. Return True if self._data and self._index were reloaded. |
|
324 | operations. Return True if self._data and self._index were reloaded. | |
325 | """ |
|
325 | """ | |
326 | if self._index: |
|
326 | if self._index: | |
327 | if self._pagedin < self.MAXPAGEDIN: |
|
327 | if self._pagedin < self.MAXPAGEDIN: | |
328 | return False |
|
328 | return False | |
329 |
|
329 | |||
330 | self._index.close() |
|
330 | self._index.close() | |
331 | self._data.close() |
|
331 | self._data.close() | |
332 |
|
332 | |||
333 | # TODO: use an opener/vfs to access these paths |
|
333 | # TODO: use an opener/vfs to access these paths | |
334 | with open(self.indexpath, PACKOPENMODE) as indexfp: |
|
334 | with open(self.indexpath, PACKOPENMODE) as indexfp: | |
335 | # memory-map the file, size 0 means whole file |
|
335 | # memory-map the file, size 0 means whole file | |
336 | self._index = mmap.mmap(indexfp.fileno(), 0, |
|
336 | self._index = mmap.mmap(indexfp.fileno(), 0, | |
337 | access=mmap.ACCESS_READ) |
|
337 | access=mmap.ACCESS_READ) | |
338 | with open(self.packpath, PACKOPENMODE) as datafp: |
|
338 | with open(self.packpath, PACKOPENMODE) as datafp: | |
339 | self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ) |
|
339 | self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ) | |
340 |
|
340 | |||
341 | self._pagedin = 0 |
|
341 | self._pagedin = 0 | |
342 | return True |
|
342 | return True | |
343 |
|
343 | |||
344 | def getmissing(self, keys): |
|
344 | def getmissing(self, keys): | |
345 | raise NotImplementedError() |
|
345 | raise NotImplementedError() | |
346 |
|
346 | |||
347 | def markledger(self, ledger, options=None): |
|
347 | def markledger(self, ledger, options=None): | |
348 | raise NotImplementedError() |
|
348 | raise NotImplementedError() | |
349 |
|
349 | |||
350 | def cleanup(self, ledger): |
|
350 | def cleanup(self, ledger): | |
351 | raise NotImplementedError() |
|
351 | raise NotImplementedError() | |
352 |
|
352 | |||
353 | def __iter__(self): |
|
353 | def __iter__(self): | |
354 | raise NotImplementedError() |
|
354 | raise NotImplementedError() | |
355 |
|
355 | |||
356 | def iterentries(self): |
|
356 | def iterentries(self): | |
357 | raise NotImplementedError() |
|
357 | raise NotImplementedError() | |
358 |
|
358 | |||
359 | class mutablebasepack(versionmixin): |
|
359 | class mutablebasepack(versionmixin): | |
360 |
|
360 | |||
361 | def __init__(self, ui, packdir, version=2): |
|
361 | def __init__(self, ui, packdir, version=2): | |
362 | self._checkversion(version) |
|
362 | self._checkversion(version) | |
363 | # TODO(augie): make this configurable |
|
363 | # TODO(augie): make this configurable | |
364 | self._compressor = 'GZ' |
|
364 | self._compressor = 'GZ' | |
365 | opener = vfsmod.vfs(packdir) |
|
365 | opener = vfsmod.vfs(packdir) | |
366 | opener.createmode = 0o444 |
|
366 | opener.createmode = 0o444 | |
367 | self.opener = opener |
|
367 | self.opener = opener | |
368 |
|
368 | |||
369 | self.entries = {} |
|
369 | self.entries = {} | |
370 |
|
370 | |||
371 | shallowutil.mkstickygroupdir(ui, packdir) |
|
371 | shallowutil.mkstickygroupdir(ui, packdir) | |
372 | self.packfp, self.packpath = opener.mkstemp( |
|
372 | self.packfp, self.packpath = opener.mkstemp( | |
373 | suffix=self.PACKSUFFIX + '-tmp') |
|
373 | suffix=self.PACKSUFFIX + '-tmp') | |
374 | self.idxfp, self.idxpath = opener.mkstemp( |
|
374 | self.idxfp, self.idxpath = opener.mkstemp( | |
375 | suffix=self.INDEXSUFFIX + '-tmp') |
|
375 | suffix=self.INDEXSUFFIX + '-tmp') | |
376 | self.packfp = os.fdopen(self.packfp, 'w+') |
|
376 | self.packfp = os.fdopen(self.packfp, r'w+') | |
377 | self.idxfp = os.fdopen(self.idxfp, 'w+') |
|
377 | self.idxfp = os.fdopen(self.idxfp, r'w+') | |
378 | self.sha = hashlib.sha1() |
|
378 | self.sha = hashlib.sha1() | |
379 | self._closed = False |
|
379 | self._closed = False | |
380 |
|
380 | |||
381 | # The opener provides no way of doing permission fixup on files created |
|
381 | # The opener provides no way of doing permission fixup on files created | |
382 | # via mkstemp, so we must fix it ourselves. We can probably fix this |
|
382 | # via mkstemp, so we must fix it ourselves. We can probably fix this | |
383 | # upstream in vfs.mkstemp so we don't need to use the private method. |
|
383 | # upstream in vfs.mkstemp so we don't need to use the private method. | |
384 | opener._fixfilemode(opener.join(self.packpath)) |
|
384 | opener._fixfilemode(opener.join(self.packpath)) | |
385 | opener._fixfilemode(opener.join(self.idxpath)) |
|
385 | opener._fixfilemode(opener.join(self.idxpath)) | |
386 |
|
386 | |||
387 | # Write header |
|
387 | # Write header | |
388 | # TODO: make it extensible (ex: allow specifying compression algorithm, |
|
388 | # TODO: make it extensible (ex: allow specifying compression algorithm, | |
389 | # a flexible key/value header, delta algorithm, fanout size, etc) |
|
389 | # a flexible key/value header, delta algorithm, fanout size, etc) | |
390 | versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int |
|
390 | versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int | |
391 | self.writeraw(versionbuf) |
|
391 | self.writeraw(versionbuf) | |
392 |
|
392 | |||
393 | def __enter__(self): |
|
393 | def __enter__(self): | |
394 | return self |
|
394 | return self | |
395 |
|
395 | |||
396 | def __exit__(self, exc_type, exc_value, traceback): |
|
396 | def __exit__(self, exc_type, exc_value, traceback): | |
397 | if exc_type is None: |
|
397 | if exc_type is None: | |
398 | self.close() |
|
398 | self.close() | |
399 | else: |
|
399 | else: | |
400 | self.abort() |
|
400 | self.abort() | |
401 |
|
401 | |||
402 | def abort(self): |
|
402 | def abort(self): | |
403 | # Unclean exit |
|
403 | # Unclean exit | |
404 | self._cleantemppacks() |
|
404 | self._cleantemppacks() | |
405 |
|
405 | |||
406 | def writeraw(self, data): |
|
406 | def writeraw(self, data): | |
407 | self.packfp.write(data) |
|
407 | self.packfp.write(data) | |
408 | self.sha.update(data) |
|
408 | self.sha.update(data) | |
409 |
|
409 | |||
410 | def close(self, ledger=None): |
|
410 | def close(self, ledger=None): | |
411 | if self._closed: |
|
411 | if self._closed: | |
412 | return |
|
412 | return | |
413 |
|
413 | |||
414 | try: |
|
414 | try: | |
415 | sha = self.sha.hexdigest() |
|
415 | sha = self.sha.hexdigest() | |
416 | self.packfp.close() |
|
416 | self.packfp.close() | |
417 | self.writeindex() |
|
417 | self.writeindex() | |
418 |
|
418 | |||
419 | if len(self.entries) == 0: |
|
419 | if len(self.entries) == 0: | |
420 | # Empty pack |
|
420 | # Empty pack | |
421 | self._cleantemppacks() |
|
421 | self._cleantemppacks() | |
422 | self._closed = True |
|
422 | self._closed = True | |
423 | return None |
|
423 | return None | |
424 |
|
424 | |||
425 | self.opener.rename(self.packpath, sha + self.PACKSUFFIX) |
|
425 | self.opener.rename(self.packpath, sha + self.PACKSUFFIX) | |
426 | try: |
|
426 | try: | |
427 | self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX) |
|
427 | self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX) | |
428 | except Exception as ex: |
|
428 | except Exception as ex: | |
429 | try: |
|
429 | try: | |
430 | self.opener.unlink(sha + self.PACKSUFFIX) |
|
430 | self.opener.unlink(sha + self.PACKSUFFIX) | |
431 | except Exception: |
|
431 | except Exception: | |
432 | pass |
|
432 | pass | |
433 | # Throw exception 'ex' explicitly since a normal 'raise' would |
|
433 | # Throw exception 'ex' explicitly since a normal 'raise' would | |
434 | # potentially throw an exception from the unlink cleanup. |
|
434 | # potentially throw an exception from the unlink cleanup. | |
435 | raise ex |
|
435 | raise ex | |
436 | except Exception: |
|
436 | except Exception: | |
437 | # Clean up temp packs in all exception cases |
|
437 | # Clean up temp packs in all exception cases | |
438 | self._cleantemppacks() |
|
438 | self._cleantemppacks() | |
439 | raise |
|
439 | raise | |
440 |
|
440 | |||
441 | self._closed = True |
|
441 | self._closed = True | |
442 | result = self.opener.join(sha) |
|
442 | result = self.opener.join(sha) | |
443 | if ledger: |
|
443 | if ledger: | |
444 | ledger.addcreated(result) |
|
444 | ledger.addcreated(result) | |
445 | return result |
|
445 | return result | |
446 |
|
446 | |||
447 | def _cleantemppacks(self): |
|
447 | def _cleantemppacks(self): | |
448 | try: |
|
448 | try: | |
449 | self.opener.unlink(self.packpath) |
|
449 | self.opener.unlink(self.packpath) | |
450 | except Exception: |
|
450 | except Exception: | |
451 | pass |
|
451 | pass | |
452 | try: |
|
452 | try: | |
453 | self.opener.unlink(self.idxpath) |
|
453 | self.opener.unlink(self.idxpath) | |
454 | except Exception: |
|
454 | except Exception: | |
455 | pass |
|
455 | pass | |
456 |
|
456 | |||
457 | def writeindex(self): |
|
457 | def writeindex(self): | |
458 | rawindex = '' |
|
458 | rawindex = '' | |
459 |
|
459 | |||
460 | largefanout = len(self.entries) > SMALLFANOUTCUTOFF |
|
460 | largefanout = len(self.entries) > SMALLFANOUTCUTOFF | |
461 | if largefanout: |
|
461 | if largefanout: | |
462 | params = indexparams(LARGEFANOUTPREFIX, self.VERSION) |
|
462 | params = indexparams(LARGEFANOUTPREFIX, self.VERSION) | |
463 | else: |
|
463 | else: | |
464 | params = indexparams(SMALLFANOUTPREFIX, self.VERSION) |
|
464 | params = indexparams(SMALLFANOUTPREFIX, self.VERSION) | |
465 |
|
465 | |||
466 | fanouttable = [EMPTYFANOUT] * params.fanoutcount |
|
466 | fanouttable = [EMPTYFANOUT] * params.fanoutcount | |
467 |
|
467 | |||
468 | # Precompute the location of each entry |
|
468 | # Precompute the location of each entry | |
469 | locations = {} |
|
469 | locations = {} | |
470 | count = 0 |
|
470 | count = 0 | |
471 | for node in sorted(self.entries.iterkeys()): |
|
471 | for node in sorted(self.entries.iterkeys()): | |
472 | location = count * self.INDEXENTRYLENGTH |
|
472 | location = count * self.INDEXENTRYLENGTH | |
473 | locations[node] = location |
|
473 | locations[node] = location | |
474 | count += 1 |
|
474 | count += 1 | |
475 |
|
475 | |||
476 | # Must use [0] on the unpack result since it's always a tuple. |
|
476 | # Must use [0] on the unpack result since it's always a tuple. | |
477 | fanoutkey = struct.unpack(params.fanoutstruct, |
|
477 | fanoutkey = struct.unpack(params.fanoutstruct, | |
478 | node[:params.fanoutprefix])[0] |
|
478 | node[:params.fanoutprefix])[0] | |
479 | if fanouttable[fanoutkey] == EMPTYFANOUT: |
|
479 | if fanouttable[fanoutkey] == EMPTYFANOUT: | |
480 | fanouttable[fanoutkey] = location |
|
480 | fanouttable[fanoutkey] = location | |
481 |
|
481 | |||
482 | rawfanouttable = '' |
|
482 | rawfanouttable = '' | |
483 | last = 0 |
|
483 | last = 0 | |
484 | for offset in fanouttable: |
|
484 | for offset in fanouttable: | |
485 | offset = offset if offset != EMPTYFANOUT else last |
|
485 | offset = offset if offset != EMPTYFANOUT else last | |
486 | last = offset |
|
486 | last = offset | |
487 | rawfanouttable += struct.pack('!I', offset) |
|
487 | rawfanouttable += struct.pack('!I', offset) | |
488 |
|
488 | |||
489 | rawentrieslength = struct.pack('!Q', len(self.entries)) |
|
489 | rawentrieslength = struct.pack('!Q', len(self.entries)) | |
490 |
|
490 | |||
491 | # The index offset is the it's location in the file. So after the 2 byte |
|
491 | # The index offset is the it's location in the file. So after the 2 byte | |
492 | # header and the fanouttable. |
|
492 | # header and the fanouttable. | |
493 | rawindex = self.createindex(locations, 2 + len(rawfanouttable)) |
|
493 | rawindex = self.createindex(locations, 2 + len(rawfanouttable)) | |
494 |
|
494 | |||
495 | self._writeheader(params) |
|
495 | self._writeheader(params) | |
496 | self.idxfp.write(rawfanouttable) |
|
496 | self.idxfp.write(rawfanouttable) | |
497 | self.idxfp.write(rawentrieslength) |
|
497 | self.idxfp.write(rawentrieslength) | |
498 | self.idxfp.write(rawindex) |
|
498 | self.idxfp.write(rawindex) | |
499 | self.idxfp.close() |
|
499 | self.idxfp.close() | |
500 |
|
500 | |||
501 | def createindex(self, nodelocations): |
|
501 | def createindex(self, nodelocations): | |
502 | raise NotImplementedError() |
|
502 | raise NotImplementedError() | |
503 |
|
503 | |||
504 | def _writeheader(self, indexparams): |
|
504 | def _writeheader(self, indexparams): | |
505 | # Index header |
|
505 | # Index header | |
506 | # <version: 1 byte> |
|
506 | # <version: 1 byte> | |
507 | # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8 |
|
507 | # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8 | |
508 | # <unused: 7 bit> # future use (compression, delta format, etc) |
|
508 | # <unused: 7 bit> # future use (compression, delta format, etc) | |
509 | config = 0 |
|
509 | config = 0 | |
510 | if indexparams.fanoutprefix == LARGEFANOUTPREFIX: |
|
510 | if indexparams.fanoutprefix == LARGEFANOUTPREFIX: | |
511 | config = 0b10000000 |
|
511 | config = 0b10000000 | |
512 | self.idxfp.write(struct.pack('!BB', self.VERSION, config)) |
|
512 | self.idxfp.write(struct.pack('!BB', self.VERSION, config)) | |
513 |
|
513 | |||
514 | class indexparams(object): |
|
514 | class indexparams(object): | |
515 | __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount', |
|
515 | __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount', | |
516 | r'fanoutsize', r'indexstart') |
|
516 | r'fanoutsize', r'indexstart') | |
517 |
|
517 | |||
518 | def __init__(self, prefixsize, version): |
|
518 | def __init__(self, prefixsize, version): | |
519 | self.fanoutprefix = prefixsize |
|
519 | self.fanoutprefix = prefixsize | |
520 |
|
520 | |||
521 | # The struct pack format for fanout table location (i.e. the format that |
|
521 | # The struct pack format for fanout table location (i.e. the format that | |
522 | # converts the node prefix into an integer location in the fanout |
|
522 | # converts the node prefix into an integer location in the fanout | |
523 | # table). |
|
523 | # table). | |
524 | if prefixsize == SMALLFANOUTPREFIX: |
|
524 | if prefixsize == SMALLFANOUTPREFIX: | |
525 | self.fanoutstruct = '!B' |
|
525 | self.fanoutstruct = '!B' | |
526 | elif prefixsize == LARGEFANOUTPREFIX: |
|
526 | elif prefixsize == LARGEFANOUTPREFIX: | |
527 | self.fanoutstruct = '!H' |
|
527 | self.fanoutstruct = '!H' | |
528 | else: |
|
528 | else: | |
529 | raise ValueError("invalid fanout prefix size: %s" % prefixsize) |
|
529 | raise ValueError("invalid fanout prefix size: %s" % prefixsize) | |
530 |
|
530 | |||
531 | # The number of fanout table entries |
|
531 | # The number of fanout table entries | |
532 | self.fanoutcount = 2**(prefixsize * 8) |
|
532 | self.fanoutcount = 2**(prefixsize * 8) | |
533 |
|
533 | |||
534 | # The total bytes used by the fanout table |
|
534 | # The total bytes used by the fanout table | |
535 | self.fanoutsize = self.fanoutcount * 4 |
|
535 | self.fanoutsize = self.fanoutcount * 4 | |
536 |
|
536 | |||
537 | self.indexstart = FANOUTSTART + self.fanoutsize |
|
537 | self.indexstart = FANOUTSTART + self.fanoutsize | |
538 | # Skip the index length |
|
538 | # Skip the index length | |
539 | self.indexstart += 8 |
|
539 | self.indexstart += 8 |
General Comments 0
You need to be logged in to leave comments.
Login now