Show More
@@ -1,540 +1,541 b'' | |||||
1 | from __future__ import absolute_import |
|
1 | from __future__ import absolute_import | |
2 |
|
2 | |||
3 | import collections |
|
3 | import collections | |
4 | import errno |
|
4 | import errno | |
5 | import hashlib |
|
5 | import hashlib | |
6 | import mmap |
|
6 | import mmap | |
7 | import os |
|
7 | import os | |
8 | import struct |
|
8 | import struct | |
9 | import time |
|
9 | import time | |
10 |
|
10 | |||
11 | from mercurial.i18n import _ |
|
11 | from mercurial.i18n import _ | |
12 | from mercurial import ( |
|
12 | from mercurial import ( | |
13 | node as nodemod, |
|
13 | node as nodemod, | |
14 | policy, |
|
14 | policy, | |
15 | pycompat, |
|
15 | pycompat, | |
16 | util, |
|
16 | util, | |
17 | vfs as vfsmod, |
|
17 | vfs as vfsmod, | |
18 | ) |
|
18 | ) | |
19 | from . import shallowutil |
|
19 | from . import shallowutil | |
20 |
|
20 | |||
21 | osutil = policy.importmod(r'osutil') |
|
21 | osutil = policy.importmod(r'osutil') | |
22 |
|
22 | |||
23 | # The pack version supported by this implementation. This will need to be |
|
23 | # The pack version supported by this implementation. This will need to be | |
24 | # rev'd whenever the byte format changes. Ex: changing the fanout prefix, |
|
24 | # rev'd whenever the byte format changes. Ex: changing the fanout prefix, | |
25 | # changing any of the int sizes, changing the delta algorithm, etc. |
|
25 | # changing any of the int sizes, changing the delta algorithm, etc. | |
26 | PACKVERSIONSIZE = 1 |
|
26 | PACKVERSIONSIZE = 1 | |
27 | INDEXVERSIONSIZE = 2 |
|
27 | INDEXVERSIONSIZE = 2 | |
28 |
|
28 | |||
29 | FANOUTSTART = INDEXVERSIONSIZE |
|
29 | FANOUTSTART = INDEXVERSIONSIZE | |
30 |
|
30 | |||
31 | # Constant that indicates a fanout table entry hasn't been filled in. (This does |
|
31 | # Constant that indicates a fanout table entry hasn't been filled in. (This does | |
32 | # not get serialized) |
|
32 | # not get serialized) | |
33 | EMPTYFANOUT = -1 |
|
33 | EMPTYFANOUT = -1 | |
34 |
|
34 | |||
35 | # The fanout prefix is the number of bytes that can be addressed by the fanout |
|
35 | # The fanout prefix is the number of bytes that can be addressed by the fanout | |
36 | # table. Example: a fanout prefix of 1 means we use the first byte of a hash to |
|
36 | # table. Example: a fanout prefix of 1 means we use the first byte of a hash to | |
37 | # look in the fanout table (which will be 2^8 entries long). |
|
37 | # look in the fanout table (which will be 2^8 entries long). | |
38 | SMALLFANOUTPREFIX = 1 |
|
38 | SMALLFANOUTPREFIX = 1 | |
39 | LARGEFANOUTPREFIX = 2 |
|
39 | LARGEFANOUTPREFIX = 2 | |
40 |
|
40 | |||
41 | # The number of entries in the index at which point we switch to a large fanout. |
|
41 | # The number of entries in the index at which point we switch to a large fanout. | |
42 | # It is chosen to balance the linear scan through a sparse fanout, with the |
|
42 | # It is chosen to balance the linear scan through a sparse fanout, with the | |
43 | # size of the bisect in actual index. |
|
43 | # size of the bisect in actual index. | |
44 | # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step |
|
44 | # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step | |
45 | # bisect) with (8 step fanout scan + 1 step bisect) |
|
45 | # bisect) with (8 step fanout scan + 1 step bisect) | |
46 | # 5 step bisect = log(2^16 / 8 / 255) # fanout |
|
46 | # 5 step bisect = log(2^16 / 8 / 255) # fanout | |
47 | # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries |
|
47 | # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries | |
48 | SMALLFANOUTCUTOFF = 2**16 // 8 |
|
48 | SMALLFANOUTCUTOFF = 2**16 // 8 | |
49 |
|
49 | |||
50 | # The amount of time to wait between checking for new packs. This prevents an |
|
50 | # The amount of time to wait between checking for new packs. This prevents an | |
51 | # exception when data is moved to a new pack after the process has already |
|
51 | # exception when data is moved to a new pack after the process has already | |
52 | # loaded the pack list. |
|
52 | # loaded the pack list. | |
53 | REFRESHRATE = 0.1 |
|
53 | REFRESHRATE = 0.1 | |
54 |
|
54 | |||
55 | if pycompat.isposix: |
|
55 | if pycompat.isposix and not pycompat.ispy3: | |
56 | # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening. |
|
56 | # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening. | |
57 | # The 'e' flag will be ignored on older versions of glibc. |
|
57 | # The 'e' flag will be ignored on older versions of glibc. | |
|
58 | # Python 3 can't handle the 'e' flag. | |||
58 | PACKOPENMODE = 'rbe' |
|
59 | PACKOPENMODE = 'rbe' | |
59 | else: |
|
60 | else: | |
60 | PACKOPENMODE = 'rb' |
|
61 | PACKOPENMODE = 'rb' | |
61 |
|
62 | |||
62 | class _cachebackedpacks(object): |
|
63 | class _cachebackedpacks(object): | |
63 | def __init__(self, packs, cachesize): |
|
64 | def __init__(self, packs, cachesize): | |
64 | self._packs = set(packs) |
|
65 | self._packs = set(packs) | |
65 | self._lrucache = util.lrucachedict(cachesize) |
|
66 | self._lrucache = util.lrucachedict(cachesize) | |
66 | self._lastpack = None |
|
67 | self._lastpack = None | |
67 |
|
68 | |||
68 | # Avoid cold start of the cache by populating the most recent packs |
|
69 | # Avoid cold start of the cache by populating the most recent packs | |
69 | # in the cache. |
|
70 | # in the cache. | |
70 | for i in reversed(range(min(cachesize, len(packs)))): |
|
71 | for i in reversed(range(min(cachesize, len(packs)))): | |
71 | self._movetofront(packs[i]) |
|
72 | self._movetofront(packs[i]) | |
72 |
|
73 | |||
73 | def _movetofront(self, pack): |
|
74 | def _movetofront(self, pack): | |
74 | # This effectively makes pack the first entry in the cache. |
|
75 | # This effectively makes pack the first entry in the cache. | |
75 | self._lrucache[pack] = True |
|
76 | self._lrucache[pack] = True | |
76 |
|
77 | |||
77 | def _registerlastpackusage(self): |
|
78 | def _registerlastpackusage(self): | |
78 | if self._lastpack is not None: |
|
79 | if self._lastpack is not None: | |
79 | self._movetofront(self._lastpack) |
|
80 | self._movetofront(self._lastpack) | |
80 | self._lastpack = None |
|
81 | self._lastpack = None | |
81 |
|
82 | |||
82 | def add(self, pack): |
|
83 | def add(self, pack): | |
83 | self._registerlastpackusage() |
|
84 | self._registerlastpackusage() | |
84 |
|
85 | |||
85 | # This method will mostly be called when packs are not in cache. |
|
86 | # This method will mostly be called when packs are not in cache. | |
86 | # Therefore, adding pack to the cache. |
|
87 | # Therefore, adding pack to the cache. | |
87 | self._movetofront(pack) |
|
88 | self._movetofront(pack) | |
88 | self._packs.add(pack) |
|
89 | self._packs.add(pack) | |
89 |
|
90 | |||
90 | def __iter__(self): |
|
91 | def __iter__(self): | |
91 | self._registerlastpackusage() |
|
92 | self._registerlastpackusage() | |
92 |
|
93 | |||
93 | # Cache iteration is based on LRU. |
|
94 | # Cache iteration is based on LRU. | |
94 | for pack in self._lrucache: |
|
95 | for pack in self._lrucache: | |
95 | self._lastpack = pack |
|
96 | self._lastpack = pack | |
96 | yield pack |
|
97 | yield pack | |
97 |
|
98 | |||
98 | cachedpacks = set(pack for pack in self._lrucache) |
|
99 | cachedpacks = set(pack for pack in self._lrucache) | |
99 | # Yield for paths not in the cache. |
|
100 | # Yield for paths not in the cache. | |
100 | for pack in self._packs - cachedpacks: |
|
101 | for pack in self._packs - cachedpacks: | |
101 | self._lastpack = pack |
|
102 | self._lastpack = pack | |
102 | yield pack |
|
103 | yield pack | |
103 |
|
104 | |||
104 | # Data not found in any pack. |
|
105 | # Data not found in any pack. | |
105 | self._lastpack = None |
|
106 | self._lastpack = None | |
106 |
|
107 | |||
107 | class basepackstore(object): |
|
108 | class basepackstore(object): | |
108 | # Default cache size limit for the pack files. |
|
109 | # Default cache size limit for the pack files. | |
109 | DEFAULTCACHESIZE = 100 |
|
110 | DEFAULTCACHESIZE = 100 | |
110 |
|
111 | |||
111 | def __init__(self, ui, path): |
|
112 | def __init__(self, ui, path): | |
112 | self.ui = ui |
|
113 | self.ui = ui | |
113 | self.path = path |
|
114 | self.path = path | |
114 |
|
115 | |||
115 | # lastrefesh is 0 so we'll immediately check for new packs on the first |
|
116 | # lastrefesh is 0 so we'll immediately check for new packs on the first | |
116 | # failure. |
|
117 | # failure. | |
117 | self.lastrefresh = 0 |
|
118 | self.lastrefresh = 0 | |
118 |
|
119 | |||
119 | packs = [] |
|
120 | packs = [] | |
120 | for filepath, __, __ in self._getavailablepackfilessorted(): |
|
121 | for filepath, __, __ in self._getavailablepackfilessorted(): | |
121 | try: |
|
122 | try: | |
122 | pack = self.getpack(filepath) |
|
123 | pack = self.getpack(filepath) | |
123 | except Exception as ex: |
|
124 | except Exception as ex: | |
124 | # An exception may be thrown if the pack file is corrupted |
|
125 | # An exception may be thrown if the pack file is corrupted | |
125 | # somehow. Log a warning but keep going in this case, just |
|
126 | # somehow. Log a warning but keep going in this case, just | |
126 | # skipping this pack file. |
|
127 | # skipping this pack file. | |
127 | # |
|
128 | # | |
128 | # If this is an ENOENT error then don't even bother logging. |
|
129 | # If this is an ENOENT error then don't even bother logging. | |
129 | # Someone could have removed the file since we retrieved the |
|
130 | # Someone could have removed the file since we retrieved the | |
130 | # list of paths. |
|
131 | # list of paths. | |
131 | if getattr(ex, 'errno', None) != errno.ENOENT: |
|
132 | if getattr(ex, 'errno', None) != errno.ENOENT: | |
132 | ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex)) |
|
133 | ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex)) | |
133 | continue |
|
134 | continue | |
134 | packs.append(pack) |
|
135 | packs.append(pack) | |
135 |
|
136 | |||
136 | self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE) |
|
137 | self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE) | |
137 |
|
138 | |||
138 | def _getavailablepackfiles(self): |
|
139 | def _getavailablepackfiles(self): | |
139 | """For each pack file (a index/data file combo), yields: |
|
140 | """For each pack file (a index/data file combo), yields: | |
140 | (full path without extension, mtime, size) |
|
141 | (full path without extension, mtime, size) | |
141 |
|
142 | |||
142 | mtime will be the mtime of the index/data file (whichever is newer) |
|
143 | mtime will be the mtime of the index/data file (whichever is newer) | |
143 | size is the combined size of index/data file |
|
144 | size is the combined size of index/data file | |
144 | """ |
|
145 | """ | |
145 | indexsuffixlen = len(self.INDEXSUFFIX) |
|
146 | indexsuffixlen = len(self.INDEXSUFFIX) | |
146 | packsuffixlen = len(self.PACKSUFFIX) |
|
147 | packsuffixlen = len(self.PACKSUFFIX) | |
147 |
|
148 | |||
148 | ids = set() |
|
149 | ids = set() | |
149 | sizes = collections.defaultdict(lambda: 0) |
|
150 | sizes = collections.defaultdict(lambda: 0) | |
150 | mtimes = collections.defaultdict(lambda: []) |
|
151 | mtimes = collections.defaultdict(lambda: []) | |
151 | try: |
|
152 | try: | |
152 | for filename, type, stat in osutil.listdir(self.path, stat=True): |
|
153 | for filename, type, stat in osutil.listdir(self.path, stat=True): | |
153 | id = None |
|
154 | id = None | |
154 | if filename[-indexsuffixlen:] == self.INDEXSUFFIX: |
|
155 | if filename[-indexsuffixlen:] == self.INDEXSUFFIX: | |
155 | id = filename[:-indexsuffixlen] |
|
156 | id = filename[:-indexsuffixlen] | |
156 | elif filename[-packsuffixlen:] == self.PACKSUFFIX: |
|
157 | elif filename[-packsuffixlen:] == self.PACKSUFFIX: | |
157 | id = filename[:-packsuffixlen] |
|
158 | id = filename[:-packsuffixlen] | |
158 |
|
159 | |||
159 | # Since we expect to have two files corresponding to each ID |
|
160 | # Since we expect to have two files corresponding to each ID | |
160 | # (the index file and the pack file), we can yield once we see |
|
161 | # (the index file and the pack file), we can yield once we see | |
161 | # it twice. |
|
162 | # it twice. | |
162 | if id: |
|
163 | if id: | |
163 | sizes[id] += stat.st_size # Sum both files' sizes together |
|
164 | sizes[id] += stat.st_size # Sum both files' sizes together | |
164 | mtimes[id].append(stat.st_mtime) |
|
165 | mtimes[id].append(stat.st_mtime) | |
165 | if id in ids: |
|
166 | if id in ids: | |
166 | yield (os.path.join(self.path, id), max(mtimes[id]), |
|
167 | yield (os.path.join(self.path, id), max(mtimes[id]), | |
167 | sizes[id]) |
|
168 | sizes[id]) | |
168 | else: |
|
169 | else: | |
169 | ids.add(id) |
|
170 | ids.add(id) | |
170 | except OSError as ex: |
|
171 | except OSError as ex: | |
171 | if ex.errno != errno.ENOENT: |
|
172 | if ex.errno != errno.ENOENT: | |
172 | raise |
|
173 | raise | |
173 |
|
174 | |||
174 | def _getavailablepackfilessorted(self): |
|
175 | def _getavailablepackfilessorted(self): | |
175 | """Like `_getavailablepackfiles`, but also sorts the files by mtime, |
|
176 | """Like `_getavailablepackfiles`, but also sorts the files by mtime, | |
176 | yielding newest files first. |
|
177 | yielding newest files first. | |
177 |
|
178 | |||
178 | This is desirable, since it is more likely newer packfiles have more |
|
179 | This is desirable, since it is more likely newer packfiles have more | |
179 | desirable data. |
|
180 | desirable data. | |
180 | """ |
|
181 | """ | |
181 | files = [] |
|
182 | files = [] | |
182 | for path, mtime, size in self._getavailablepackfiles(): |
|
183 | for path, mtime, size in self._getavailablepackfiles(): | |
183 | files.append((mtime, size, path)) |
|
184 | files.append((mtime, size, path)) | |
184 | files = sorted(files, reverse=True) |
|
185 | files = sorted(files, reverse=True) | |
185 | for mtime, size, path in files: |
|
186 | for mtime, size, path in files: | |
186 | yield path, mtime, size |
|
187 | yield path, mtime, size | |
187 |
|
188 | |||
188 | def gettotalsizeandcount(self): |
|
189 | def gettotalsizeandcount(self): | |
189 | """Returns the total disk size (in bytes) of all the pack files in |
|
190 | """Returns the total disk size (in bytes) of all the pack files in | |
190 | this store, and the count of pack files. |
|
191 | this store, and the count of pack files. | |
191 |
|
192 | |||
192 | (This might be smaller than the total size of the ``self.path`` |
|
193 | (This might be smaller than the total size of the ``self.path`` | |
193 | directory, since this only considers fuly-writen pack files, and not |
|
194 | directory, since this only considers fuly-writen pack files, and not | |
194 | temporary files or other detritus on the directory.) |
|
195 | temporary files or other detritus on the directory.) | |
195 | """ |
|
196 | """ | |
196 | totalsize = 0 |
|
197 | totalsize = 0 | |
197 | count = 0 |
|
198 | count = 0 | |
198 | for __, __, size in self._getavailablepackfiles(): |
|
199 | for __, __, size in self._getavailablepackfiles(): | |
199 | totalsize += size |
|
200 | totalsize += size | |
200 | count += 1 |
|
201 | count += 1 | |
201 | return totalsize, count |
|
202 | return totalsize, count | |
202 |
|
203 | |||
203 | def getmetrics(self): |
|
204 | def getmetrics(self): | |
204 | """Returns metrics on the state of this store.""" |
|
205 | """Returns metrics on the state of this store.""" | |
205 | size, count = self.gettotalsizeandcount() |
|
206 | size, count = self.gettotalsizeandcount() | |
206 | return { |
|
207 | return { | |
207 | 'numpacks': count, |
|
208 | 'numpacks': count, | |
208 | 'totalpacksize': size, |
|
209 | 'totalpacksize': size, | |
209 | } |
|
210 | } | |
210 |
|
211 | |||
211 | def getpack(self, path): |
|
212 | def getpack(self, path): | |
212 | raise NotImplementedError() |
|
213 | raise NotImplementedError() | |
213 |
|
214 | |||
214 | def getmissing(self, keys): |
|
215 | def getmissing(self, keys): | |
215 | missing = keys |
|
216 | missing = keys | |
216 | for pack in self.packs: |
|
217 | for pack in self.packs: | |
217 | missing = pack.getmissing(missing) |
|
218 | missing = pack.getmissing(missing) | |
218 |
|
219 | |||
219 | # Ensures better performance of the cache by keeping the most |
|
220 | # Ensures better performance of the cache by keeping the most | |
220 | # recently accessed pack at the beginning in subsequent iterations. |
|
221 | # recently accessed pack at the beginning in subsequent iterations. | |
221 | if not missing: |
|
222 | if not missing: | |
222 | return missing |
|
223 | return missing | |
223 |
|
224 | |||
224 | if missing: |
|
225 | if missing: | |
225 | for pack in self.refresh(): |
|
226 | for pack in self.refresh(): | |
226 | missing = pack.getmissing(missing) |
|
227 | missing = pack.getmissing(missing) | |
227 |
|
228 | |||
228 | return missing |
|
229 | return missing | |
229 |
|
230 | |||
230 | def markledger(self, ledger, options=None): |
|
231 | def markledger(self, ledger, options=None): | |
231 | for pack in self.packs: |
|
232 | for pack in self.packs: | |
232 | pack.markledger(ledger) |
|
233 | pack.markledger(ledger) | |
233 |
|
234 | |||
234 | def markforrefresh(self): |
|
235 | def markforrefresh(self): | |
235 | """Tells the store that there may be new pack files, so the next time it |
|
236 | """Tells the store that there may be new pack files, so the next time it | |
236 | has a lookup miss it should check for new files.""" |
|
237 | has a lookup miss it should check for new files.""" | |
237 | self.lastrefresh = 0 |
|
238 | self.lastrefresh = 0 | |
238 |
|
239 | |||
239 | def refresh(self): |
|
240 | def refresh(self): | |
240 | """Checks for any new packs on disk, adds them to the main pack list, |
|
241 | """Checks for any new packs on disk, adds them to the main pack list, | |
241 | and returns a list of just the new packs.""" |
|
242 | and returns a list of just the new packs.""" | |
242 | now = time.time() |
|
243 | now = time.time() | |
243 |
|
244 | |||
244 | # If we experience a lot of misses (like in the case of getmissing() on |
|
245 | # If we experience a lot of misses (like in the case of getmissing() on | |
245 | # new objects), let's only actually check disk for new stuff every once |
|
246 | # new objects), let's only actually check disk for new stuff every once | |
246 | # in a while. Generally this code path should only ever matter when a |
|
247 | # in a while. Generally this code path should only ever matter when a | |
247 | # repack is going on in the background, and that should be pretty rare |
|
248 | # repack is going on in the background, and that should be pretty rare | |
248 | # to have that happen twice in quick succession. |
|
249 | # to have that happen twice in quick succession. | |
249 | newpacks = [] |
|
250 | newpacks = [] | |
250 | if now > self.lastrefresh + REFRESHRATE: |
|
251 | if now > self.lastrefresh + REFRESHRATE: | |
251 | self.lastrefresh = now |
|
252 | self.lastrefresh = now | |
252 | previous = set(p.path for p in self.packs) |
|
253 | previous = set(p.path for p in self.packs) | |
253 | for filepath, __, __ in self._getavailablepackfilessorted(): |
|
254 | for filepath, __, __ in self._getavailablepackfilessorted(): | |
254 | if filepath not in previous: |
|
255 | if filepath not in previous: | |
255 | newpack = self.getpack(filepath) |
|
256 | newpack = self.getpack(filepath) | |
256 | newpacks.append(newpack) |
|
257 | newpacks.append(newpack) | |
257 | self.packs.add(newpack) |
|
258 | self.packs.add(newpack) | |
258 |
|
259 | |||
259 | return newpacks |
|
260 | return newpacks | |
260 |
|
261 | |||
261 | class versionmixin(object): |
|
262 | class versionmixin(object): | |
262 | # Mix-in for classes with multiple supported versions |
|
263 | # Mix-in for classes with multiple supported versions | |
263 | VERSION = None |
|
264 | VERSION = None | |
264 | SUPPORTED_VERSIONS = [2] |
|
265 | SUPPORTED_VERSIONS = [2] | |
265 |
|
266 | |||
266 | def _checkversion(self, version): |
|
267 | def _checkversion(self, version): | |
267 | if version in self.SUPPORTED_VERSIONS: |
|
268 | if version in self.SUPPORTED_VERSIONS: | |
268 | if self.VERSION is None: |
|
269 | if self.VERSION is None: | |
269 | # only affect this instance |
|
270 | # only affect this instance | |
270 | self.VERSION = version |
|
271 | self.VERSION = version | |
271 | elif self.VERSION != version: |
|
272 | elif self.VERSION != version: | |
272 | raise RuntimeError('inconsistent version: %s' % version) |
|
273 | raise RuntimeError('inconsistent version: %s' % version) | |
273 | else: |
|
274 | else: | |
274 | raise RuntimeError('unsupported version: %s' % version) |
|
275 | raise RuntimeError('unsupported version: %s' % version) | |
275 |
|
276 | |||
276 | class basepack(versionmixin): |
|
277 | class basepack(versionmixin): | |
277 | # The maximum amount we should read via mmap before remmaping so the old |
|
278 | # The maximum amount we should read via mmap before remmaping so the old | |
278 | # pages can be released (100MB) |
|
279 | # pages can be released (100MB) | |
279 | MAXPAGEDIN = 100 * 1024**2 |
|
280 | MAXPAGEDIN = 100 * 1024**2 | |
280 |
|
281 | |||
281 | SUPPORTED_VERSIONS = [2] |
|
282 | SUPPORTED_VERSIONS = [2] | |
282 |
|
283 | |||
283 | def __init__(self, path): |
|
284 | def __init__(self, path): | |
284 | self.path = path |
|
285 | self.path = path | |
285 | self.packpath = path + self.PACKSUFFIX |
|
286 | self.packpath = path + self.PACKSUFFIX | |
286 | self.indexpath = path + self.INDEXSUFFIX |
|
287 | self.indexpath = path + self.INDEXSUFFIX | |
287 |
|
288 | |||
288 | self.indexsize = os.stat(self.indexpath).st_size |
|
289 | self.indexsize = os.stat(self.indexpath).st_size | |
289 | self.datasize = os.stat(self.packpath).st_size |
|
290 | self.datasize = os.stat(self.packpath).st_size | |
290 |
|
291 | |||
291 | self._index = None |
|
292 | self._index = None | |
292 | self._data = None |
|
293 | self._data = None | |
293 | self.freememory() # initialize the mmap |
|
294 | self.freememory() # initialize the mmap | |
294 |
|
295 | |||
295 | version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0] |
|
296 | version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0] | |
296 | self._checkversion(version) |
|
297 | self._checkversion(version) | |
297 |
|
298 | |||
298 | version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE]) |
|
299 | version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE]) | |
299 | self._checkversion(version) |
|
300 | self._checkversion(version) | |
300 |
|
301 | |||
301 | if 0b10000000 & config: |
|
302 | if 0b10000000 & config: | |
302 | self.params = indexparams(LARGEFANOUTPREFIX, version) |
|
303 | self.params = indexparams(LARGEFANOUTPREFIX, version) | |
303 | else: |
|
304 | else: | |
304 | self.params = indexparams(SMALLFANOUTPREFIX, version) |
|
305 | self.params = indexparams(SMALLFANOUTPREFIX, version) | |
305 |
|
306 | |||
306 | @util.propertycache |
|
307 | @util.propertycache | |
307 | def _fanouttable(self): |
|
308 | def _fanouttable(self): | |
308 | params = self.params |
|
309 | params = self.params | |
309 | rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize] |
|
310 | rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize] | |
310 | fanouttable = [] |
|
311 | fanouttable = [] | |
311 | for i in pycompat.xrange(0, params.fanoutcount): |
|
312 | for i in pycompat.xrange(0, params.fanoutcount): | |
312 | loc = i * 4 |
|
313 | loc = i * 4 | |
313 | fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0] |
|
314 | fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0] | |
314 | fanouttable.append(fanoutentry) |
|
315 | fanouttable.append(fanoutentry) | |
315 | return fanouttable |
|
316 | return fanouttable | |
316 |
|
317 | |||
317 | @util.propertycache |
|
318 | @util.propertycache | |
318 | def _indexend(self): |
|
319 | def _indexend(self): | |
319 | nodecount = struct.unpack_from('!Q', self._index, |
|
320 | nodecount = struct.unpack_from('!Q', self._index, | |
320 | self.params.indexstart - 8)[0] |
|
321 | self.params.indexstart - 8)[0] | |
321 | return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH |
|
322 | return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH | |
322 |
|
323 | |||
323 | def freememory(self): |
|
324 | def freememory(self): | |
324 | """Unmap and remap the memory to free it up after known expensive |
|
325 | """Unmap and remap the memory to free it up after known expensive | |
325 | operations. Return True if self._data and self._index were reloaded. |
|
326 | operations. Return True if self._data and self._index were reloaded. | |
326 | """ |
|
327 | """ | |
327 | if self._index: |
|
328 | if self._index: | |
328 | if self._pagedin < self.MAXPAGEDIN: |
|
329 | if self._pagedin < self.MAXPAGEDIN: | |
329 | return False |
|
330 | return False | |
330 |
|
331 | |||
331 | self._index.close() |
|
332 | self._index.close() | |
332 | self._data.close() |
|
333 | self._data.close() | |
333 |
|
334 | |||
334 | # TODO: use an opener/vfs to access these paths |
|
335 | # TODO: use an opener/vfs to access these paths | |
335 | with open(self.indexpath, PACKOPENMODE) as indexfp: |
|
336 | with open(self.indexpath, PACKOPENMODE) as indexfp: | |
336 | # memory-map the file, size 0 means whole file |
|
337 | # memory-map the file, size 0 means whole file | |
337 | self._index = mmap.mmap(indexfp.fileno(), 0, |
|
338 | self._index = mmap.mmap(indexfp.fileno(), 0, | |
338 | access=mmap.ACCESS_READ) |
|
339 | access=mmap.ACCESS_READ) | |
339 | with open(self.packpath, PACKOPENMODE) as datafp: |
|
340 | with open(self.packpath, PACKOPENMODE) as datafp: | |
340 | self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ) |
|
341 | self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ) | |
341 |
|
342 | |||
342 | self._pagedin = 0 |
|
343 | self._pagedin = 0 | |
343 | return True |
|
344 | return True | |
344 |
|
345 | |||
345 | def getmissing(self, keys): |
|
346 | def getmissing(self, keys): | |
346 | raise NotImplementedError() |
|
347 | raise NotImplementedError() | |
347 |
|
348 | |||
348 | def markledger(self, ledger, options=None): |
|
349 | def markledger(self, ledger, options=None): | |
349 | raise NotImplementedError() |
|
350 | raise NotImplementedError() | |
350 |
|
351 | |||
351 | def cleanup(self, ledger): |
|
352 | def cleanup(self, ledger): | |
352 | raise NotImplementedError() |
|
353 | raise NotImplementedError() | |
353 |
|
354 | |||
354 | def __iter__(self): |
|
355 | def __iter__(self): | |
355 | raise NotImplementedError() |
|
356 | raise NotImplementedError() | |
356 |
|
357 | |||
357 | def iterentries(self): |
|
358 | def iterentries(self): | |
358 | raise NotImplementedError() |
|
359 | raise NotImplementedError() | |
359 |
|
360 | |||
360 | class mutablebasepack(versionmixin): |
|
361 | class mutablebasepack(versionmixin): | |
361 |
|
362 | |||
362 | def __init__(self, ui, packdir, version=2): |
|
363 | def __init__(self, ui, packdir, version=2): | |
363 | self._checkversion(version) |
|
364 | self._checkversion(version) | |
364 | # TODO(augie): make this configurable |
|
365 | # TODO(augie): make this configurable | |
365 | self._compressor = 'GZ' |
|
366 | self._compressor = 'GZ' | |
366 | opener = vfsmod.vfs(packdir) |
|
367 | opener = vfsmod.vfs(packdir) | |
367 | opener.createmode = 0o444 |
|
368 | opener.createmode = 0o444 | |
368 | self.opener = opener |
|
369 | self.opener = opener | |
369 |
|
370 | |||
370 | self.entries = {} |
|
371 | self.entries = {} | |
371 |
|
372 | |||
372 | shallowutil.mkstickygroupdir(ui, packdir) |
|
373 | shallowutil.mkstickygroupdir(ui, packdir) | |
373 | self.packfp, self.packpath = opener.mkstemp( |
|
374 | self.packfp, self.packpath = opener.mkstemp( | |
374 | suffix=self.PACKSUFFIX + '-tmp') |
|
375 | suffix=self.PACKSUFFIX + '-tmp') | |
375 | self.idxfp, self.idxpath = opener.mkstemp( |
|
376 | self.idxfp, self.idxpath = opener.mkstemp( | |
376 | suffix=self.INDEXSUFFIX + '-tmp') |
|
377 | suffix=self.INDEXSUFFIX + '-tmp') | |
377 | self.packfp = os.fdopen(self.packfp, r'wb+') |
|
378 | self.packfp = os.fdopen(self.packfp, r'wb+') | |
378 | self.idxfp = os.fdopen(self.idxfp, r'wb+') |
|
379 | self.idxfp = os.fdopen(self.idxfp, r'wb+') | |
379 | self.sha = hashlib.sha1() |
|
380 | self.sha = hashlib.sha1() | |
380 | self._closed = False |
|
381 | self._closed = False | |
381 |
|
382 | |||
382 | # The opener provides no way of doing permission fixup on files created |
|
383 | # The opener provides no way of doing permission fixup on files created | |
383 | # via mkstemp, so we must fix it ourselves. We can probably fix this |
|
384 | # via mkstemp, so we must fix it ourselves. We can probably fix this | |
384 | # upstream in vfs.mkstemp so we don't need to use the private method. |
|
385 | # upstream in vfs.mkstemp so we don't need to use the private method. | |
385 | opener._fixfilemode(opener.join(self.packpath)) |
|
386 | opener._fixfilemode(opener.join(self.packpath)) | |
386 | opener._fixfilemode(opener.join(self.idxpath)) |
|
387 | opener._fixfilemode(opener.join(self.idxpath)) | |
387 |
|
388 | |||
388 | # Write header |
|
389 | # Write header | |
389 | # TODO: make it extensible (ex: allow specifying compression algorithm, |
|
390 | # TODO: make it extensible (ex: allow specifying compression algorithm, | |
390 | # a flexible key/value header, delta algorithm, fanout size, etc) |
|
391 | # a flexible key/value header, delta algorithm, fanout size, etc) | |
391 | versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int |
|
392 | versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int | |
392 | self.writeraw(versionbuf) |
|
393 | self.writeraw(versionbuf) | |
393 |
|
394 | |||
394 | def __enter__(self): |
|
395 | def __enter__(self): | |
395 | return self |
|
396 | return self | |
396 |
|
397 | |||
397 | def __exit__(self, exc_type, exc_value, traceback): |
|
398 | def __exit__(self, exc_type, exc_value, traceback): | |
398 | if exc_type is None: |
|
399 | if exc_type is None: | |
399 | self.close() |
|
400 | self.close() | |
400 | else: |
|
401 | else: | |
401 | self.abort() |
|
402 | self.abort() | |
402 |
|
403 | |||
403 | def abort(self): |
|
404 | def abort(self): | |
404 | # Unclean exit |
|
405 | # Unclean exit | |
405 | self._cleantemppacks() |
|
406 | self._cleantemppacks() | |
406 |
|
407 | |||
407 | def writeraw(self, data): |
|
408 | def writeraw(self, data): | |
408 | self.packfp.write(data) |
|
409 | self.packfp.write(data) | |
409 | self.sha.update(data) |
|
410 | self.sha.update(data) | |
410 |
|
411 | |||
411 | def close(self, ledger=None): |
|
412 | def close(self, ledger=None): | |
412 | if self._closed: |
|
413 | if self._closed: | |
413 | return |
|
414 | return | |
414 |
|
415 | |||
415 | try: |
|
416 | try: | |
416 | sha = nodemod.hex(self.sha.digest()) |
|
417 | sha = nodemod.hex(self.sha.digest()) | |
417 | self.packfp.close() |
|
418 | self.packfp.close() | |
418 | self.writeindex() |
|
419 | self.writeindex() | |
419 |
|
420 | |||
420 | if len(self.entries) == 0: |
|
421 | if len(self.entries) == 0: | |
421 | # Empty pack |
|
422 | # Empty pack | |
422 | self._cleantemppacks() |
|
423 | self._cleantemppacks() | |
423 | self._closed = True |
|
424 | self._closed = True | |
424 | return None |
|
425 | return None | |
425 |
|
426 | |||
426 | self.opener.rename(self.packpath, sha + self.PACKSUFFIX) |
|
427 | self.opener.rename(self.packpath, sha + self.PACKSUFFIX) | |
427 | try: |
|
428 | try: | |
428 | self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX) |
|
429 | self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX) | |
429 | except Exception as ex: |
|
430 | except Exception as ex: | |
430 | try: |
|
431 | try: | |
431 | self.opener.unlink(sha + self.PACKSUFFIX) |
|
432 | self.opener.unlink(sha + self.PACKSUFFIX) | |
432 | except Exception: |
|
433 | except Exception: | |
433 | pass |
|
434 | pass | |
434 | # Throw exception 'ex' explicitly since a normal 'raise' would |
|
435 | # Throw exception 'ex' explicitly since a normal 'raise' would | |
435 | # potentially throw an exception from the unlink cleanup. |
|
436 | # potentially throw an exception from the unlink cleanup. | |
436 | raise ex |
|
437 | raise ex | |
437 | except Exception: |
|
438 | except Exception: | |
438 | # Clean up temp packs in all exception cases |
|
439 | # Clean up temp packs in all exception cases | |
439 | self._cleantemppacks() |
|
440 | self._cleantemppacks() | |
440 | raise |
|
441 | raise | |
441 |
|
442 | |||
442 | self._closed = True |
|
443 | self._closed = True | |
443 | result = self.opener.join(sha) |
|
444 | result = self.opener.join(sha) | |
444 | if ledger: |
|
445 | if ledger: | |
445 | ledger.addcreated(result) |
|
446 | ledger.addcreated(result) | |
446 | return result |
|
447 | return result | |
447 |
|
448 | |||
448 | def _cleantemppacks(self): |
|
449 | def _cleantemppacks(self): | |
449 | try: |
|
450 | try: | |
450 | self.opener.unlink(self.packpath) |
|
451 | self.opener.unlink(self.packpath) | |
451 | except Exception: |
|
452 | except Exception: | |
452 | pass |
|
453 | pass | |
453 | try: |
|
454 | try: | |
454 | self.opener.unlink(self.idxpath) |
|
455 | self.opener.unlink(self.idxpath) | |
455 | except Exception: |
|
456 | except Exception: | |
456 | pass |
|
457 | pass | |
457 |
|
458 | |||
458 | def writeindex(self): |
|
459 | def writeindex(self): | |
459 | rawindex = '' |
|
460 | rawindex = '' | |
460 |
|
461 | |||
461 | largefanout = len(self.entries) > SMALLFANOUTCUTOFF |
|
462 | largefanout = len(self.entries) > SMALLFANOUTCUTOFF | |
462 | if largefanout: |
|
463 | if largefanout: | |
463 | params = indexparams(LARGEFANOUTPREFIX, self.VERSION) |
|
464 | params = indexparams(LARGEFANOUTPREFIX, self.VERSION) | |
464 | else: |
|
465 | else: | |
465 | params = indexparams(SMALLFANOUTPREFIX, self.VERSION) |
|
466 | params = indexparams(SMALLFANOUTPREFIX, self.VERSION) | |
466 |
|
467 | |||
467 | fanouttable = [EMPTYFANOUT] * params.fanoutcount |
|
468 | fanouttable = [EMPTYFANOUT] * params.fanoutcount | |
468 |
|
469 | |||
469 | # Precompute the location of each entry |
|
470 | # Precompute the location of each entry | |
470 | locations = {} |
|
471 | locations = {} | |
471 | count = 0 |
|
472 | count = 0 | |
472 | for node in sorted(self.entries): |
|
473 | for node in sorted(self.entries): | |
473 | location = count * self.INDEXENTRYLENGTH |
|
474 | location = count * self.INDEXENTRYLENGTH | |
474 | locations[node] = location |
|
475 | locations[node] = location | |
475 | count += 1 |
|
476 | count += 1 | |
476 |
|
477 | |||
477 | # Must use [0] on the unpack result since it's always a tuple. |
|
478 | # Must use [0] on the unpack result since it's always a tuple. | |
478 | fanoutkey = struct.unpack(params.fanoutstruct, |
|
479 | fanoutkey = struct.unpack(params.fanoutstruct, | |
479 | node[:params.fanoutprefix])[0] |
|
480 | node[:params.fanoutprefix])[0] | |
480 | if fanouttable[fanoutkey] == EMPTYFANOUT: |
|
481 | if fanouttable[fanoutkey] == EMPTYFANOUT: | |
481 | fanouttable[fanoutkey] = location |
|
482 | fanouttable[fanoutkey] = location | |
482 |
|
483 | |||
483 | rawfanouttable = '' |
|
484 | rawfanouttable = '' | |
484 | last = 0 |
|
485 | last = 0 | |
485 | for offset in fanouttable: |
|
486 | for offset in fanouttable: | |
486 | offset = offset if offset != EMPTYFANOUT else last |
|
487 | offset = offset if offset != EMPTYFANOUT else last | |
487 | last = offset |
|
488 | last = offset | |
488 | rawfanouttable += struct.pack('!I', offset) |
|
489 | rawfanouttable += struct.pack('!I', offset) | |
489 |
|
490 | |||
490 | rawentrieslength = struct.pack('!Q', len(self.entries)) |
|
491 | rawentrieslength = struct.pack('!Q', len(self.entries)) | |
491 |
|
492 | |||
492 | # The index offset is the it's location in the file. So after the 2 byte |
|
493 | # The index offset is the it's location in the file. So after the 2 byte | |
493 | # header and the fanouttable. |
|
494 | # header and the fanouttable. | |
494 | rawindex = self.createindex(locations, 2 + len(rawfanouttable)) |
|
495 | rawindex = self.createindex(locations, 2 + len(rawfanouttable)) | |
495 |
|
496 | |||
496 | self._writeheader(params) |
|
497 | self._writeheader(params) | |
497 | self.idxfp.write(rawfanouttable) |
|
498 | self.idxfp.write(rawfanouttable) | |
498 | self.idxfp.write(rawentrieslength) |
|
499 | self.idxfp.write(rawentrieslength) | |
499 | self.idxfp.write(rawindex) |
|
500 | self.idxfp.write(rawindex) | |
500 | self.idxfp.close() |
|
501 | self.idxfp.close() | |
501 |
|
502 | |||
502 | def createindex(self, nodelocations): |
|
503 | def createindex(self, nodelocations): | |
503 | raise NotImplementedError() |
|
504 | raise NotImplementedError() | |
504 |
|
505 | |||
505 | def _writeheader(self, indexparams): |
|
506 | def _writeheader(self, indexparams): | |
506 | # Index header |
|
507 | # Index header | |
507 | # <version: 1 byte> |
|
508 | # <version: 1 byte> | |
508 | # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8 |
|
509 | # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8 | |
509 | # <unused: 7 bit> # future use (compression, delta format, etc) |
|
510 | # <unused: 7 bit> # future use (compression, delta format, etc) | |
510 | config = 0 |
|
511 | config = 0 | |
511 | if indexparams.fanoutprefix == LARGEFANOUTPREFIX: |
|
512 | if indexparams.fanoutprefix == LARGEFANOUTPREFIX: | |
512 | config = 0b10000000 |
|
513 | config = 0b10000000 | |
513 | self.idxfp.write(struct.pack('!BB', self.VERSION, config)) |
|
514 | self.idxfp.write(struct.pack('!BB', self.VERSION, config)) | |
514 |
|
515 | |||
515 | class indexparams(object): |
|
516 | class indexparams(object): | |
516 | __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount', |
|
517 | __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount', | |
517 | r'fanoutsize', r'indexstart') |
|
518 | r'fanoutsize', r'indexstart') | |
518 |
|
519 | |||
519 | def __init__(self, prefixsize, version): |
|
520 | def __init__(self, prefixsize, version): | |
520 | self.fanoutprefix = prefixsize |
|
521 | self.fanoutprefix = prefixsize | |
521 |
|
522 | |||
522 | # The struct pack format for fanout table location (i.e. the format that |
|
523 | # The struct pack format for fanout table location (i.e. the format that | |
523 | # converts the node prefix into an integer location in the fanout |
|
524 | # converts the node prefix into an integer location in the fanout | |
524 | # table). |
|
525 | # table). | |
525 | if prefixsize == SMALLFANOUTPREFIX: |
|
526 | if prefixsize == SMALLFANOUTPREFIX: | |
526 | self.fanoutstruct = '!B' |
|
527 | self.fanoutstruct = '!B' | |
527 | elif prefixsize == LARGEFANOUTPREFIX: |
|
528 | elif prefixsize == LARGEFANOUTPREFIX: | |
528 | self.fanoutstruct = '!H' |
|
529 | self.fanoutstruct = '!H' | |
529 | else: |
|
530 | else: | |
530 | raise ValueError("invalid fanout prefix size: %s" % prefixsize) |
|
531 | raise ValueError("invalid fanout prefix size: %s" % prefixsize) | |
531 |
|
532 | |||
532 | # The number of fanout table entries |
|
533 | # The number of fanout table entries | |
533 | self.fanoutcount = 2**(prefixsize * 8) |
|
534 | self.fanoutcount = 2**(prefixsize * 8) | |
534 |
|
535 | |||
535 | # The total bytes used by the fanout table |
|
536 | # The total bytes used by the fanout table | |
536 | self.fanoutsize = self.fanoutcount * 4 |
|
537 | self.fanoutsize = self.fanoutcount * 4 | |
537 |
|
538 | |||
538 | self.indexstart = FANOUTSTART + self.fanoutsize |
|
539 | self.indexstart = FANOUTSTART + self.fanoutsize | |
539 | # Skip the index length |
|
540 | # Skip the index length | |
540 | self.indexstart += 8 |
|
541 | self.indexstart += 8 |
General Comments 0
You need to be logged in to leave comments.
Login now