##// END OF EJS Templates
remotefilelog: adjust the signature of basepack.createindex...
marmoute -
r52192:726d8584 default
parent child Browse files
Show More
@@ -1,548 +1,548 b''
1 import collections
1 import collections
2 import errno
2 import errno
3 import mmap
3 import mmap
4 import os
4 import os
5 import struct
5 import struct
6 import time
6 import time
7
7
8 from mercurial.i18n import _
8 from mercurial.i18n import _
9 from mercurial.pycompat import (
9 from mercurial.pycompat import (
10 open,
10 open,
11 )
11 )
12 from mercurial.node import hex
12 from mercurial.node import hex
13 from mercurial import (
13 from mercurial import (
14 policy,
14 policy,
15 util,
15 util,
16 vfs as vfsmod,
16 vfs as vfsmod,
17 )
17 )
18 from mercurial.utils import hashutil
18 from mercurial.utils import hashutil
19 from . import shallowutil
19 from . import shallowutil
20
20
21 osutil = policy.importmod('osutil')
21 osutil = policy.importmod('osutil')
22
22
23 # The pack version supported by this implementation. This will need to be
23 # The pack version supported by this implementation. This will need to be
24 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
24 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
25 # changing any of the int sizes, changing the delta algorithm, etc.
25 # changing any of the int sizes, changing the delta algorithm, etc.
26 PACKVERSIONSIZE = 1
26 PACKVERSIONSIZE = 1
27 INDEXVERSIONSIZE = 2
27 INDEXVERSIONSIZE = 2
28
28
29 FANOUTSTART = INDEXVERSIONSIZE
29 FANOUTSTART = INDEXVERSIONSIZE
30
30
31 # Constant that indicates a fanout table entry hasn't been filled in. (This does
31 # Constant that indicates a fanout table entry hasn't been filled in. (This does
32 # not get serialized)
32 # not get serialized)
33 EMPTYFANOUT = -1
33 EMPTYFANOUT = -1
34
34
35 # The fanout prefix is the number of bytes that can be addressed by the fanout
35 # The fanout prefix is the number of bytes that can be addressed by the fanout
36 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
36 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
37 # look in the fanout table (which will be 2^8 entries long).
37 # look in the fanout table (which will be 2^8 entries long).
38 SMALLFANOUTPREFIX = 1
38 SMALLFANOUTPREFIX = 1
39 LARGEFANOUTPREFIX = 2
39 LARGEFANOUTPREFIX = 2
40
40
41 # The number of entries in the index at which point we switch to a large fanout.
41 # The number of entries in the index at which point we switch to a large fanout.
42 # It is chosen to balance the linear scan through a sparse fanout, with the
42 # It is chosen to balance the linear scan through a sparse fanout, with the
43 # size of the bisect in actual index.
43 # size of the bisect in actual index.
44 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
44 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
45 # bisect) with (8 step fanout scan + 1 step bisect)
45 # bisect) with (8 step fanout scan + 1 step bisect)
46 # 5 step bisect = log(2^16 / 8 / 255) # fanout
46 # 5 step bisect = log(2^16 / 8 / 255) # fanout
47 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
47 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
48 SMALLFANOUTCUTOFF = 2 ** 16 // 8
48 SMALLFANOUTCUTOFF = 2 ** 16 // 8
49
49
50 # The amount of time to wait between checking for new packs. This prevents an
50 # The amount of time to wait between checking for new packs. This prevents an
51 # exception when data is moved to a new pack after the process has already
51 # exception when data is moved to a new pack after the process has already
52 # loaded the pack list.
52 # loaded the pack list.
53 REFRESHRATE = 0.1
53 REFRESHRATE = 0.1
54
54
55
55
56 class _cachebackedpacks:
56 class _cachebackedpacks:
57 def __init__(self, packs, cachesize):
57 def __init__(self, packs, cachesize):
58 self._packs = set(packs)
58 self._packs = set(packs)
59 self._lrucache = util.lrucachedict(cachesize)
59 self._lrucache = util.lrucachedict(cachesize)
60 self._lastpack = None
60 self._lastpack = None
61
61
62 # Avoid cold start of the cache by populating the most recent packs
62 # Avoid cold start of the cache by populating the most recent packs
63 # in the cache.
63 # in the cache.
64 for i in reversed(range(min(cachesize, len(packs)))):
64 for i in reversed(range(min(cachesize, len(packs)))):
65 self._movetofront(packs[i])
65 self._movetofront(packs[i])
66
66
67 def _movetofront(self, pack):
67 def _movetofront(self, pack):
68 # This effectively makes pack the first entry in the cache.
68 # This effectively makes pack the first entry in the cache.
69 self._lrucache[pack] = True
69 self._lrucache[pack] = True
70
70
71 def _registerlastpackusage(self):
71 def _registerlastpackusage(self):
72 if self._lastpack is not None:
72 if self._lastpack is not None:
73 self._movetofront(self._lastpack)
73 self._movetofront(self._lastpack)
74 self._lastpack = None
74 self._lastpack = None
75
75
76 def add(self, pack):
76 def add(self, pack):
77 self._registerlastpackusage()
77 self._registerlastpackusage()
78
78
79 # This method will mostly be called when packs are not in cache.
79 # This method will mostly be called when packs are not in cache.
80 # Therefore, adding pack to the cache.
80 # Therefore, adding pack to the cache.
81 self._movetofront(pack)
81 self._movetofront(pack)
82 self._packs.add(pack)
82 self._packs.add(pack)
83
83
84 def __iter__(self):
84 def __iter__(self):
85 self._registerlastpackusage()
85 self._registerlastpackusage()
86
86
87 # Cache iteration is based on LRU.
87 # Cache iteration is based on LRU.
88 for pack in self._lrucache:
88 for pack in self._lrucache:
89 self._lastpack = pack
89 self._lastpack = pack
90 yield pack
90 yield pack
91
91
92 cachedpacks = {pack for pack in self._lrucache}
92 cachedpacks = {pack for pack in self._lrucache}
93 # Yield for paths not in the cache.
93 # Yield for paths not in the cache.
94 for pack in self._packs - cachedpacks:
94 for pack in self._packs - cachedpacks:
95 self._lastpack = pack
95 self._lastpack = pack
96 yield pack
96 yield pack
97
97
98 # Data not found in any pack.
98 # Data not found in any pack.
99 self._lastpack = None
99 self._lastpack = None
100
100
101
101
102 class basepackstore:
102 class basepackstore:
103 # Default cache size limit for the pack files.
103 # Default cache size limit for the pack files.
104 DEFAULTCACHESIZE = 100
104 DEFAULTCACHESIZE = 100
105
105
106 def __init__(self, ui, path):
106 def __init__(self, ui, path):
107 self.ui = ui
107 self.ui = ui
108 self.path = path
108 self.path = path
109
109
110 # lastrefesh is 0 so we'll immediately check for new packs on the first
110 # lastrefesh is 0 so we'll immediately check for new packs on the first
111 # failure.
111 # failure.
112 self.lastrefresh = 0
112 self.lastrefresh = 0
113
113
114 packs = []
114 packs = []
115 for filepath, __, __ in self._getavailablepackfilessorted():
115 for filepath, __, __ in self._getavailablepackfilessorted():
116 try:
116 try:
117 pack = self.getpack(filepath)
117 pack = self.getpack(filepath)
118 except Exception as ex:
118 except Exception as ex:
119 # An exception may be thrown if the pack file is corrupted
119 # An exception may be thrown if the pack file is corrupted
120 # somehow. Log a warning but keep going in this case, just
120 # somehow. Log a warning but keep going in this case, just
121 # skipping this pack file.
121 # skipping this pack file.
122 #
122 #
123 # If this is an ENOENT error then don't even bother logging.
123 # If this is an ENOENT error then don't even bother logging.
124 # Someone could have removed the file since we retrieved the
124 # Someone could have removed the file since we retrieved the
125 # list of paths.
125 # list of paths.
126 if getattr(ex, 'errno', None) != errno.ENOENT:
126 if getattr(ex, 'errno', None) != errno.ENOENT:
127 ui.warn(_(b'unable to load pack %s: %s\n') % (filepath, ex))
127 ui.warn(_(b'unable to load pack %s: %s\n') % (filepath, ex))
128 continue
128 continue
129 packs.append(pack)
129 packs.append(pack)
130
130
131 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
131 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
132
132
133 def _getavailablepackfiles(self):
133 def _getavailablepackfiles(self):
134 """For each pack file (a index/data file combo), yields:
134 """For each pack file (a index/data file combo), yields:
135 (full path without extension, mtime, size)
135 (full path without extension, mtime, size)
136
136
137 mtime will be the mtime of the index/data file (whichever is newer)
137 mtime will be the mtime of the index/data file (whichever is newer)
138 size is the combined size of index/data file
138 size is the combined size of index/data file
139 """
139 """
140 indexsuffixlen = len(self.INDEXSUFFIX)
140 indexsuffixlen = len(self.INDEXSUFFIX)
141 packsuffixlen = len(self.PACKSUFFIX)
141 packsuffixlen = len(self.PACKSUFFIX)
142
142
143 ids = set()
143 ids = set()
144 sizes = collections.defaultdict(lambda: 0)
144 sizes = collections.defaultdict(lambda: 0)
145 mtimes = collections.defaultdict(lambda: [])
145 mtimes = collections.defaultdict(lambda: [])
146 try:
146 try:
147 for filename, type, stat in osutil.listdir(self.path, stat=True):
147 for filename, type, stat in osutil.listdir(self.path, stat=True):
148 id = None
148 id = None
149 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
149 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
150 id = filename[:-indexsuffixlen]
150 id = filename[:-indexsuffixlen]
151 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
151 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
152 id = filename[:-packsuffixlen]
152 id = filename[:-packsuffixlen]
153
153
154 # Since we expect to have two files corresponding to each ID
154 # Since we expect to have two files corresponding to each ID
155 # (the index file and the pack file), we can yield once we see
155 # (the index file and the pack file), we can yield once we see
156 # it twice.
156 # it twice.
157 if id:
157 if id:
158 sizes[id] += stat.st_size # Sum both files' sizes together
158 sizes[id] += stat.st_size # Sum both files' sizes together
159 mtimes[id].append(stat.st_mtime)
159 mtimes[id].append(stat.st_mtime)
160 if id in ids:
160 if id in ids:
161 yield (
161 yield (
162 os.path.join(self.path, id),
162 os.path.join(self.path, id),
163 max(mtimes[id]),
163 max(mtimes[id]),
164 sizes[id],
164 sizes[id],
165 )
165 )
166 else:
166 else:
167 ids.add(id)
167 ids.add(id)
168 except FileNotFoundError:
168 except FileNotFoundError:
169 pass
169 pass
170
170
171 def _getavailablepackfilessorted(self):
171 def _getavailablepackfilessorted(self):
172 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
172 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
173 yielding newest files first.
173 yielding newest files first.
174
174
175 This is desirable, since it is more likely newer packfiles have more
175 This is desirable, since it is more likely newer packfiles have more
176 desirable data.
176 desirable data.
177 """
177 """
178 files = []
178 files = []
179 for path, mtime, size in self._getavailablepackfiles():
179 for path, mtime, size in self._getavailablepackfiles():
180 files.append((mtime, size, path))
180 files.append((mtime, size, path))
181 files = sorted(files, reverse=True)
181 files = sorted(files, reverse=True)
182 for mtime, size, path in files:
182 for mtime, size, path in files:
183 yield path, mtime, size
183 yield path, mtime, size
184
184
185 def gettotalsizeandcount(self):
185 def gettotalsizeandcount(self):
186 """Returns the total disk size (in bytes) of all the pack files in
186 """Returns the total disk size (in bytes) of all the pack files in
187 this store, and the count of pack files.
187 this store, and the count of pack files.
188
188
189 (This might be smaller than the total size of the ``self.path``
189 (This might be smaller than the total size of the ``self.path``
190 directory, since this only considers fuly-writen pack files, and not
190 directory, since this only considers fuly-writen pack files, and not
191 temporary files or other detritus on the directory.)
191 temporary files or other detritus on the directory.)
192 """
192 """
193 totalsize = 0
193 totalsize = 0
194 count = 0
194 count = 0
195 for __, __, size in self._getavailablepackfiles():
195 for __, __, size in self._getavailablepackfiles():
196 totalsize += size
196 totalsize += size
197 count += 1
197 count += 1
198 return totalsize, count
198 return totalsize, count
199
199
200 def getmetrics(self):
200 def getmetrics(self):
201 """Returns metrics on the state of this store."""
201 """Returns metrics on the state of this store."""
202 size, count = self.gettotalsizeandcount()
202 size, count = self.gettotalsizeandcount()
203 return {
203 return {
204 b'numpacks': count,
204 b'numpacks': count,
205 b'totalpacksize': size,
205 b'totalpacksize': size,
206 }
206 }
207
207
208 def getpack(self, path):
208 def getpack(self, path):
209 raise NotImplementedError()
209 raise NotImplementedError()
210
210
211 def getmissing(self, keys):
211 def getmissing(self, keys):
212 missing = keys
212 missing = keys
213 for pack in self.packs:
213 for pack in self.packs:
214 missing = pack.getmissing(missing)
214 missing = pack.getmissing(missing)
215
215
216 # Ensures better performance of the cache by keeping the most
216 # Ensures better performance of the cache by keeping the most
217 # recently accessed pack at the beginning in subsequent iterations.
217 # recently accessed pack at the beginning in subsequent iterations.
218 if not missing:
218 if not missing:
219 return missing
219 return missing
220
220
221 if missing:
221 if missing:
222 for pack in self.refresh():
222 for pack in self.refresh():
223 missing = pack.getmissing(missing)
223 missing = pack.getmissing(missing)
224
224
225 return missing
225 return missing
226
226
227 def markledger(self, ledger, options=None):
227 def markledger(self, ledger, options=None):
228 for pack in self.packs:
228 for pack in self.packs:
229 pack.markledger(ledger)
229 pack.markledger(ledger)
230
230
231 def markforrefresh(self):
231 def markforrefresh(self):
232 """Tells the store that there may be new pack files, so the next time it
232 """Tells the store that there may be new pack files, so the next time it
233 has a lookup miss it should check for new files."""
233 has a lookup miss it should check for new files."""
234 self.lastrefresh = 0
234 self.lastrefresh = 0
235
235
236 def refresh(self):
236 def refresh(self):
237 """Checks for any new packs on disk, adds them to the main pack list,
237 """Checks for any new packs on disk, adds them to the main pack list,
238 and returns a list of just the new packs."""
238 and returns a list of just the new packs."""
239 now = time.time()
239 now = time.time()
240
240
241 # If we experience a lot of misses (like in the case of getmissing() on
241 # If we experience a lot of misses (like in the case of getmissing() on
242 # new objects), let's only actually check disk for new stuff every once
242 # new objects), let's only actually check disk for new stuff every once
243 # in a while. Generally this code path should only ever matter when a
243 # in a while. Generally this code path should only ever matter when a
244 # repack is going on in the background, and that should be pretty rare
244 # repack is going on in the background, and that should be pretty rare
245 # to have that happen twice in quick succession.
245 # to have that happen twice in quick succession.
246 newpacks = []
246 newpacks = []
247 if now > self.lastrefresh + REFRESHRATE:
247 if now > self.lastrefresh + REFRESHRATE:
248 self.lastrefresh = now
248 self.lastrefresh = now
249 previous = {p.path for p in self.packs}
249 previous = {p.path for p in self.packs}
250 for filepath, __, __ in self._getavailablepackfilessorted():
250 for filepath, __, __ in self._getavailablepackfilessorted():
251 if filepath not in previous:
251 if filepath not in previous:
252 newpack = self.getpack(filepath)
252 newpack = self.getpack(filepath)
253 newpacks.append(newpack)
253 newpacks.append(newpack)
254 self.packs.add(newpack)
254 self.packs.add(newpack)
255
255
256 return newpacks
256 return newpacks
257
257
258
258
259 class versionmixin:
259 class versionmixin:
260 # Mix-in for classes with multiple supported versions
260 # Mix-in for classes with multiple supported versions
261 VERSION = None
261 VERSION = None
262 SUPPORTED_VERSIONS = [2]
262 SUPPORTED_VERSIONS = [2]
263
263
264 def _checkversion(self, version):
264 def _checkversion(self, version):
265 if version in self.SUPPORTED_VERSIONS:
265 if version in self.SUPPORTED_VERSIONS:
266 if self.VERSION is None:
266 if self.VERSION is None:
267 # only affect this instance
267 # only affect this instance
268 self.VERSION = version
268 self.VERSION = version
269 elif self.VERSION != version:
269 elif self.VERSION != version:
270 raise RuntimeError(b'inconsistent version: %d' % version)
270 raise RuntimeError(b'inconsistent version: %d' % version)
271 else:
271 else:
272 raise RuntimeError(b'unsupported version: %d' % version)
272 raise RuntimeError(b'unsupported version: %d' % version)
273
273
274
274
275 class basepack(versionmixin):
275 class basepack(versionmixin):
276 # The maximum amount we should read via mmap before remmaping so the old
276 # The maximum amount we should read via mmap before remmaping so the old
277 # pages can be released (100MB)
277 # pages can be released (100MB)
278 MAXPAGEDIN = 100 * 1024 ** 2
278 MAXPAGEDIN = 100 * 1024 ** 2
279
279
280 SUPPORTED_VERSIONS = [2]
280 SUPPORTED_VERSIONS = [2]
281
281
282 def __init__(self, path):
282 def __init__(self, path):
283 self.path = path
283 self.path = path
284 self.packpath = path + self.PACKSUFFIX
284 self.packpath = path + self.PACKSUFFIX
285 self.indexpath = path + self.INDEXSUFFIX
285 self.indexpath = path + self.INDEXSUFFIX
286
286
287 self.indexsize = os.stat(self.indexpath).st_size
287 self.indexsize = os.stat(self.indexpath).st_size
288 self.datasize = os.stat(self.packpath).st_size
288 self.datasize = os.stat(self.packpath).st_size
289
289
290 self._index = None
290 self._index = None
291 self._data = None
291 self._data = None
292 self.freememory() # initialize the mmap
292 self.freememory() # initialize the mmap
293
293
294 version = struct.unpack(b'!B', self._data[:PACKVERSIONSIZE])[0]
294 version = struct.unpack(b'!B', self._data[:PACKVERSIONSIZE])[0]
295 self._checkversion(version)
295 self._checkversion(version)
296
296
297 version, config = struct.unpack(b'!BB', self._index[:INDEXVERSIONSIZE])
297 version, config = struct.unpack(b'!BB', self._index[:INDEXVERSIONSIZE])
298 self._checkversion(version)
298 self._checkversion(version)
299
299
300 if 0b10000000 & config:
300 if 0b10000000 & config:
301 self.params = indexparams(LARGEFANOUTPREFIX, version)
301 self.params = indexparams(LARGEFANOUTPREFIX, version)
302 else:
302 else:
303 self.params = indexparams(SMALLFANOUTPREFIX, version)
303 self.params = indexparams(SMALLFANOUTPREFIX, version)
304
304
305 @util.propertycache
305 @util.propertycache
306 def _fanouttable(self):
306 def _fanouttable(self):
307 params = self.params
307 params = self.params
308 rawfanout = self._index[FANOUTSTART : FANOUTSTART + params.fanoutsize]
308 rawfanout = self._index[FANOUTSTART : FANOUTSTART + params.fanoutsize]
309 fanouttable = []
309 fanouttable = []
310 for i in range(0, params.fanoutcount):
310 for i in range(0, params.fanoutcount):
311 loc = i * 4
311 loc = i * 4
312 fanoutentry = struct.unpack(b'!I', rawfanout[loc : loc + 4])[0]
312 fanoutentry = struct.unpack(b'!I', rawfanout[loc : loc + 4])[0]
313 fanouttable.append(fanoutentry)
313 fanouttable.append(fanoutentry)
314 return fanouttable
314 return fanouttable
315
315
316 @util.propertycache
316 @util.propertycache
317 def _indexend(self):
317 def _indexend(self):
318 nodecount = struct.unpack_from(
318 nodecount = struct.unpack_from(
319 b'!Q', self._index, self.params.indexstart - 8
319 b'!Q', self._index, self.params.indexstart - 8
320 )[0]
320 )[0]
321 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
321 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
322
322
323 def freememory(self):
323 def freememory(self):
324 """Unmap and remap the memory to free it up after known expensive
324 """Unmap and remap the memory to free it up after known expensive
325 operations. Return True if self._data and self._index were reloaded.
325 operations. Return True if self._data and self._index were reloaded.
326 """
326 """
327 if self._index:
327 if self._index:
328 if self._pagedin < self.MAXPAGEDIN:
328 if self._pagedin < self.MAXPAGEDIN:
329 return False
329 return False
330
330
331 self._index.close()
331 self._index.close()
332 self._data.close()
332 self._data.close()
333
333
334 # TODO: use an opener/vfs to access these paths
334 # TODO: use an opener/vfs to access these paths
335 with open(self.indexpath, b'rb') as indexfp:
335 with open(self.indexpath, b'rb') as indexfp:
336 # memory-map the file, size 0 means whole file
336 # memory-map the file, size 0 means whole file
337 self._index = mmap.mmap(
337 self._index = mmap.mmap(
338 indexfp.fileno(), 0, access=mmap.ACCESS_READ
338 indexfp.fileno(), 0, access=mmap.ACCESS_READ
339 )
339 )
340 with open(self.packpath, b'rb') as datafp:
340 with open(self.packpath, b'rb') as datafp:
341 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
341 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
342
342
343 self._pagedin = 0
343 self._pagedin = 0
344 return True
344 return True
345
345
346 def getmissing(self, keys):
346 def getmissing(self, keys):
347 raise NotImplementedError()
347 raise NotImplementedError()
348
348
349 def markledger(self, ledger, options=None):
349 def markledger(self, ledger, options=None):
350 raise NotImplementedError()
350 raise NotImplementedError()
351
351
352 def cleanup(self, ledger):
352 def cleanup(self, ledger):
353 raise NotImplementedError()
353 raise NotImplementedError()
354
354
355 def __iter__(self):
355 def __iter__(self):
356 raise NotImplementedError()
356 raise NotImplementedError()
357
357
358 def iterentries(self):
358 def iterentries(self):
359 raise NotImplementedError()
359 raise NotImplementedError()
360
360
361
361
362 class mutablebasepack(versionmixin):
362 class mutablebasepack(versionmixin):
363 def __init__(self, ui, packdir, version=2):
363 def __init__(self, ui, packdir, version=2):
364 self._checkversion(version)
364 self._checkversion(version)
365 # TODO(augie): make this configurable
365 # TODO(augie): make this configurable
366 self._compressor = b'GZ'
366 self._compressor = b'GZ'
367 opener = vfsmod.vfs(packdir)
367 opener = vfsmod.vfs(packdir)
368 opener.createmode = 0o444
368 opener.createmode = 0o444
369 self.opener = opener
369 self.opener = opener
370
370
371 self.entries = {}
371 self.entries = {}
372
372
373 shallowutil.mkstickygroupdir(ui, packdir)
373 shallowutil.mkstickygroupdir(ui, packdir)
374 self.packfp, self.packpath = opener.mkstemp(
374 self.packfp, self.packpath = opener.mkstemp(
375 suffix=self.PACKSUFFIX + b'-tmp'
375 suffix=self.PACKSUFFIX + b'-tmp'
376 )
376 )
377 self.idxfp, self.idxpath = opener.mkstemp(
377 self.idxfp, self.idxpath = opener.mkstemp(
378 suffix=self.INDEXSUFFIX + b'-tmp'
378 suffix=self.INDEXSUFFIX + b'-tmp'
379 )
379 )
380 self.packfp = os.fdopen(self.packfp, 'wb+')
380 self.packfp = os.fdopen(self.packfp, 'wb+')
381 self.idxfp = os.fdopen(self.idxfp, 'wb+')
381 self.idxfp = os.fdopen(self.idxfp, 'wb+')
382 self.sha = hashutil.sha1()
382 self.sha = hashutil.sha1()
383 self._closed = False
383 self._closed = False
384
384
385 # The opener provides no way of doing permission fixup on files created
385 # The opener provides no way of doing permission fixup on files created
386 # via mkstemp, so we must fix it ourselves. We can probably fix this
386 # via mkstemp, so we must fix it ourselves. We can probably fix this
387 # upstream in vfs.mkstemp so we don't need to use the private method.
387 # upstream in vfs.mkstemp so we don't need to use the private method.
388 opener._fixfilemode(opener.join(self.packpath))
388 opener._fixfilemode(opener.join(self.packpath))
389 opener._fixfilemode(opener.join(self.idxpath))
389 opener._fixfilemode(opener.join(self.idxpath))
390
390
391 # Write header
391 # Write header
392 # TODO: make it extensible (ex: allow specifying compression algorithm,
392 # TODO: make it extensible (ex: allow specifying compression algorithm,
393 # a flexible key/value header, delta algorithm, fanout size, etc)
393 # a flexible key/value header, delta algorithm, fanout size, etc)
394 versionbuf = struct.pack(b'!B', self.VERSION) # unsigned 1 byte int
394 versionbuf = struct.pack(b'!B', self.VERSION) # unsigned 1 byte int
395 self.writeraw(versionbuf)
395 self.writeraw(versionbuf)
396
396
397 def __enter__(self):
397 def __enter__(self):
398 return self
398 return self
399
399
400 def __exit__(self, exc_type, exc_value, traceback):
400 def __exit__(self, exc_type, exc_value, traceback):
401 if exc_type is None:
401 if exc_type is None:
402 self.close()
402 self.close()
403 else:
403 else:
404 self.abort()
404 self.abort()
405
405
406 def abort(self):
406 def abort(self):
407 # Unclean exit
407 # Unclean exit
408 self._cleantemppacks()
408 self._cleantemppacks()
409
409
410 def writeraw(self, data):
410 def writeraw(self, data):
411 self.packfp.write(data)
411 self.packfp.write(data)
412 self.sha.update(data)
412 self.sha.update(data)
413
413
414 def close(self, ledger=None):
414 def close(self, ledger=None):
415 if self._closed:
415 if self._closed:
416 return
416 return
417
417
418 try:
418 try:
419 sha = hex(self.sha.digest())
419 sha = hex(self.sha.digest())
420 self.packfp.close()
420 self.packfp.close()
421 self.writeindex()
421 self.writeindex()
422
422
423 if len(self.entries) == 0:
423 if len(self.entries) == 0:
424 # Empty pack
424 # Empty pack
425 self._cleantemppacks()
425 self._cleantemppacks()
426 self._closed = True
426 self._closed = True
427 return None
427 return None
428
428
429 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
429 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
430 try:
430 try:
431 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
431 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
432 except Exception as ex:
432 except Exception as ex:
433 try:
433 try:
434 self.opener.unlink(sha + self.PACKSUFFIX)
434 self.opener.unlink(sha + self.PACKSUFFIX)
435 except Exception:
435 except Exception:
436 pass
436 pass
437 # Throw exception 'ex' explicitly since a normal 'raise' would
437 # Throw exception 'ex' explicitly since a normal 'raise' would
438 # potentially throw an exception from the unlink cleanup.
438 # potentially throw an exception from the unlink cleanup.
439 raise ex
439 raise ex
440 except Exception:
440 except Exception:
441 # Clean up temp packs in all exception cases
441 # Clean up temp packs in all exception cases
442 self._cleantemppacks()
442 self._cleantemppacks()
443 raise
443 raise
444
444
445 self._closed = True
445 self._closed = True
446 result = self.opener.join(sha)
446 result = self.opener.join(sha)
447 if ledger:
447 if ledger:
448 ledger.addcreated(result)
448 ledger.addcreated(result)
449 return result
449 return result
450
450
451 def _cleantemppacks(self):
451 def _cleantemppacks(self):
452 try:
452 try:
453 self.opener.unlink(self.packpath)
453 self.opener.unlink(self.packpath)
454 except Exception:
454 except Exception:
455 pass
455 pass
456 try:
456 try:
457 self.opener.unlink(self.idxpath)
457 self.opener.unlink(self.idxpath)
458 except Exception:
458 except Exception:
459 pass
459 pass
460
460
461 def writeindex(self):
461 def writeindex(self):
462 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
462 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
463 if largefanout:
463 if largefanout:
464 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
464 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
465 else:
465 else:
466 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
466 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
467
467
468 fanouttable = [EMPTYFANOUT] * params.fanoutcount
468 fanouttable = [EMPTYFANOUT] * params.fanoutcount
469
469
470 # Precompute the location of each entry
470 # Precompute the location of each entry
471 locations = {}
471 locations = {}
472 count = 0
472 count = 0
473 for node in sorted(self.entries):
473 for node in sorted(self.entries):
474 location = count * self.INDEXENTRYLENGTH
474 location = count * self.INDEXENTRYLENGTH
475 locations[node] = location
475 locations[node] = location
476 count += 1
476 count += 1
477
477
478 # Must use [0] on the unpack result since it's always a tuple.
478 # Must use [0] on the unpack result since it's always a tuple.
479 fanoutkey = struct.unpack(
479 fanoutkey = struct.unpack(
480 params.fanoutstruct, node[: params.fanoutprefix]
480 params.fanoutstruct, node[: params.fanoutprefix]
481 )[0]
481 )[0]
482 if fanouttable[fanoutkey] == EMPTYFANOUT:
482 if fanouttable[fanoutkey] == EMPTYFANOUT:
483 fanouttable[fanoutkey] = location
483 fanouttable[fanoutkey] = location
484
484
485 rawfanouttable = b''
485 rawfanouttable = b''
486 last = 0
486 last = 0
487 for offset in fanouttable:
487 for offset in fanouttable:
488 offset = offset if offset != EMPTYFANOUT else last
488 offset = offset if offset != EMPTYFANOUT else last
489 last = offset
489 last = offset
490 rawfanouttable += struct.pack(b'!I', offset)
490 rawfanouttable += struct.pack(b'!I', offset)
491
491
492 rawentrieslength = struct.pack(b'!Q', len(self.entries))
492 rawentrieslength = struct.pack(b'!Q', len(self.entries))
493
493
494 # The index offset is the it's location in the file. So after the 2 byte
494 # The index offset is the it's location in the file. So after the 2 byte
495 # header and the fanouttable.
495 # header and the fanouttable.
496 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
496 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
497
497
498 self._writeheader(params)
498 self._writeheader(params)
499 self.idxfp.write(rawfanouttable)
499 self.idxfp.write(rawfanouttable)
500 self.idxfp.write(rawentrieslength)
500 self.idxfp.write(rawentrieslength)
501 self.idxfp.write(rawindex)
501 self.idxfp.write(rawindex)
502 self.idxfp.close()
502 self.idxfp.close()
503
503
504 def createindex(self, nodelocations):
504 def createindex(self, nodelocations, indexoffset):
505 raise NotImplementedError()
505 raise NotImplementedError()
506
506
507 def _writeheader(self, indexparams):
507 def _writeheader(self, indexparams):
508 # Index header
508 # Index header
509 # <version: 1 byte>
509 # <version: 1 byte>
510 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
510 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
511 # <unused: 7 bit> # future use (compression, delta format, etc)
511 # <unused: 7 bit> # future use (compression, delta format, etc)
512 config = 0
512 config = 0
513 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
513 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
514 config = 0b10000000
514 config = 0b10000000
515 self.idxfp.write(struct.pack(b'!BB', self.VERSION, config))
515 self.idxfp.write(struct.pack(b'!BB', self.VERSION, config))
516
516
517
517
518 class indexparams:
518 class indexparams:
519 __slots__ = (
519 __slots__ = (
520 'fanoutprefix',
520 'fanoutprefix',
521 'fanoutstruct',
521 'fanoutstruct',
522 'fanoutcount',
522 'fanoutcount',
523 'fanoutsize',
523 'fanoutsize',
524 'indexstart',
524 'indexstart',
525 )
525 )
526
526
527 def __init__(self, prefixsize, version):
527 def __init__(self, prefixsize, version):
528 self.fanoutprefix = prefixsize
528 self.fanoutprefix = prefixsize
529
529
530 # The struct pack format for fanout table location (i.e. the format that
530 # The struct pack format for fanout table location (i.e. the format that
531 # converts the node prefix into an integer location in the fanout
531 # converts the node prefix into an integer location in the fanout
532 # table).
532 # table).
533 if prefixsize == SMALLFANOUTPREFIX:
533 if prefixsize == SMALLFANOUTPREFIX:
534 self.fanoutstruct = b'!B'
534 self.fanoutstruct = b'!B'
535 elif prefixsize == LARGEFANOUTPREFIX:
535 elif prefixsize == LARGEFANOUTPREFIX:
536 self.fanoutstruct = b'!H'
536 self.fanoutstruct = b'!H'
537 else:
537 else:
538 raise ValueError(b"invalid fanout prefix size: %s" % prefixsize)
538 raise ValueError(b"invalid fanout prefix size: %s" % prefixsize)
539
539
540 # The number of fanout table entries
540 # The number of fanout table entries
541 self.fanoutcount = 2 ** (prefixsize * 8)
541 self.fanoutcount = 2 ** (prefixsize * 8)
542
542
543 # The total bytes used by the fanout table
543 # The total bytes used by the fanout table
544 self.fanoutsize = self.fanoutcount * 4
544 self.fanoutsize = self.fanoutcount * 4
545
545
546 self.indexstart = FANOUTSTART + self.fanoutsize
546 self.indexstart = FANOUTSTART + self.fanoutsize
547 # Skip the index length
547 # Skip the index length
548 self.indexstart += 8
548 self.indexstart += 8
General Comments 0
You need to be logged in to leave comments. Login now