##// END OF EJS Templates
basepack: avoid 'rbe' mode in Python 3...
Augie Fackler -
r41285:c891a11f default
parent child Browse files
Show More
@@ -1,540 +1,541 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import collections
3 import collections
4 import errno
4 import errno
5 import hashlib
5 import hashlib
6 import mmap
6 import mmap
7 import os
7 import os
8 import struct
8 import struct
9 import time
9 import time
10
10
11 from mercurial.i18n import _
11 from mercurial.i18n import _
12 from mercurial import (
12 from mercurial import (
13 node as nodemod,
13 node as nodemod,
14 policy,
14 policy,
15 pycompat,
15 pycompat,
16 util,
16 util,
17 vfs as vfsmod,
17 vfs as vfsmod,
18 )
18 )
19 from . import shallowutil
19 from . import shallowutil
20
20
21 osutil = policy.importmod(r'osutil')
21 osutil = policy.importmod(r'osutil')
22
22
23 # The pack version supported by this implementation. This will need to be
23 # The pack version supported by this implementation. This will need to be
24 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
24 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
25 # changing any of the int sizes, changing the delta algorithm, etc.
25 # changing any of the int sizes, changing the delta algorithm, etc.
26 PACKVERSIONSIZE = 1
26 PACKVERSIONSIZE = 1
27 INDEXVERSIONSIZE = 2
27 INDEXVERSIONSIZE = 2
28
28
29 FANOUTSTART = INDEXVERSIONSIZE
29 FANOUTSTART = INDEXVERSIONSIZE
30
30
31 # Constant that indicates a fanout table entry hasn't been filled in. (This does
31 # Constant that indicates a fanout table entry hasn't been filled in. (This does
32 # not get serialized)
32 # not get serialized)
33 EMPTYFANOUT = -1
33 EMPTYFANOUT = -1
34
34
35 # The fanout prefix is the number of bytes that can be addressed by the fanout
35 # The fanout prefix is the number of bytes that can be addressed by the fanout
36 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
36 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
37 # look in the fanout table (which will be 2^8 entries long).
37 # look in the fanout table (which will be 2^8 entries long).
38 SMALLFANOUTPREFIX = 1
38 SMALLFANOUTPREFIX = 1
39 LARGEFANOUTPREFIX = 2
39 LARGEFANOUTPREFIX = 2
40
40
41 # The number of entries in the index at which point we switch to a large fanout.
41 # The number of entries in the index at which point we switch to a large fanout.
42 # It is chosen to balance the linear scan through a sparse fanout, with the
42 # It is chosen to balance the linear scan through a sparse fanout, with the
43 # size of the bisect in actual index.
43 # size of the bisect in actual index.
44 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
44 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
45 # bisect) with (8 step fanout scan + 1 step bisect)
45 # bisect) with (8 step fanout scan + 1 step bisect)
46 # 5 step bisect = log(2^16 / 8 / 255) # fanout
46 # 5 step bisect = log(2^16 / 8 / 255) # fanout
47 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
47 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
48 SMALLFANOUTCUTOFF = 2**16 // 8
48 SMALLFANOUTCUTOFF = 2**16 // 8
49
49
50 # The amount of time to wait between checking for new packs. This prevents an
50 # The amount of time to wait between checking for new packs. This prevents an
51 # exception when data is moved to a new pack after the process has already
51 # exception when data is moved to a new pack after the process has already
52 # loaded the pack list.
52 # loaded the pack list.
53 REFRESHRATE = 0.1
53 REFRESHRATE = 0.1
54
54
55 if pycompat.isposix:
55 if pycompat.isposix and not pycompat.ispy3:
56 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
56 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
57 # The 'e' flag will be ignored on older versions of glibc.
57 # The 'e' flag will be ignored on older versions of glibc.
58 # Python 3 can't handle the 'e' flag.
58 PACKOPENMODE = 'rbe'
59 PACKOPENMODE = 'rbe'
59 else:
60 else:
60 PACKOPENMODE = 'rb'
61 PACKOPENMODE = 'rb'
61
62
62 class _cachebackedpacks(object):
63 class _cachebackedpacks(object):
63 def __init__(self, packs, cachesize):
64 def __init__(self, packs, cachesize):
64 self._packs = set(packs)
65 self._packs = set(packs)
65 self._lrucache = util.lrucachedict(cachesize)
66 self._lrucache = util.lrucachedict(cachesize)
66 self._lastpack = None
67 self._lastpack = None
67
68
68 # Avoid cold start of the cache by populating the most recent packs
69 # Avoid cold start of the cache by populating the most recent packs
69 # in the cache.
70 # in the cache.
70 for i in reversed(range(min(cachesize, len(packs)))):
71 for i in reversed(range(min(cachesize, len(packs)))):
71 self._movetofront(packs[i])
72 self._movetofront(packs[i])
72
73
73 def _movetofront(self, pack):
74 def _movetofront(self, pack):
74 # This effectively makes pack the first entry in the cache.
75 # This effectively makes pack the first entry in the cache.
75 self._lrucache[pack] = True
76 self._lrucache[pack] = True
76
77
77 def _registerlastpackusage(self):
78 def _registerlastpackusage(self):
78 if self._lastpack is not None:
79 if self._lastpack is not None:
79 self._movetofront(self._lastpack)
80 self._movetofront(self._lastpack)
80 self._lastpack = None
81 self._lastpack = None
81
82
82 def add(self, pack):
83 def add(self, pack):
83 self._registerlastpackusage()
84 self._registerlastpackusage()
84
85
85 # This method will mostly be called when packs are not in cache.
86 # This method will mostly be called when packs are not in cache.
86 # Therefore, adding pack to the cache.
87 # Therefore, adding pack to the cache.
87 self._movetofront(pack)
88 self._movetofront(pack)
88 self._packs.add(pack)
89 self._packs.add(pack)
89
90
90 def __iter__(self):
91 def __iter__(self):
91 self._registerlastpackusage()
92 self._registerlastpackusage()
92
93
93 # Cache iteration is based on LRU.
94 # Cache iteration is based on LRU.
94 for pack in self._lrucache:
95 for pack in self._lrucache:
95 self._lastpack = pack
96 self._lastpack = pack
96 yield pack
97 yield pack
97
98
98 cachedpacks = set(pack for pack in self._lrucache)
99 cachedpacks = set(pack for pack in self._lrucache)
99 # Yield for paths not in the cache.
100 # Yield for paths not in the cache.
100 for pack in self._packs - cachedpacks:
101 for pack in self._packs - cachedpacks:
101 self._lastpack = pack
102 self._lastpack = pack
102 yield pack
103 yield pack
103
104
104 # Data not found in any pack.
105 # Data not found in any pack.
105 self._lastpack = None
106 self._lastpack = None
106
107
107 class basepackstore(object):
108 class basepackstore(object):
108 # Default cache size limit for the pack files.
109 # Default cache size limit for the pack files.
109 DEFAULTCACHESIZE = 100
110 DEFAULTCACHESIZE = 100
110
111
111 def __init__(self, ui, path):
112 def __init__(self, ui, path):
112 self.ui = ui
113 self.ui = ui
113 self.path = path
114 self.path = path
114
115
115 # lastrefesh is 0 so we'll immediately check for new packs on the first
116 # lastrefesh is 0 so we'll immediately check for new packs on the first
116 # failure.
117 # failure.
117 self.lastrefresh = 0
118 self.lastrefresh = 0
118
119
119 packs = []
120 packs = []
120 for filepath, __, __ in self._getavailablepackfilessorted():
121 for filepath, __, __ in self._getavailablepackfilessorted():
121 try:
122 try:
122 pack = self.getpack(filepath)
123 pack = self.getpack(filepath)
123 except Exception as ex:
124 except Exception as ex:
124 # An exception may be thrown if the pack file is corrupted
125 # An exception may be thrown if the pack file is corrupted
125 # somehow. Log a warning but keep going in this case, just
126 # somehow. Log a warning but keep going in this case, just
126 # skipping this pack file.
127 # skipping this pack file.
127 #
128 #
128 # If this is an ENOENT error then don't even bother logging.
129 # If this is an ENOENT error then don't even bother logging.
129 # Someone could have removed the file since we retrieved the
130 # Someone could have removed the file since we retrieved the
130 # list of paths.
131 # list of paths.
131 if getattr(ex, 'errno', None) != errno.ENOENT:
132 if getattr(ex, 'errno', None) != errno.ENOENT:
132 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
133 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
133 continue
134 continue
134 packs.append(pack)
135 packs.append(pack)
135
136
136 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
137 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
137
138
138 def _getavailablepackfiles(self):
139 def _getavailablepackfiles(self):
139 """For each pack file (a index/data file combo), yields:
140 """For each pack file (a index/data file combo), yields:
140 (full path without extension, mtime, size)
141 (full path without extension, mtime, size)
141
142
142 mtime will be the mtime of the index/data file (whichever is newer)
143 mtime will be the mtime of the index/data file (whichever is newer)
143 size is the combined size of index/data file
144 size is the combined size of index/data file
144 """
145 """
145 indexsuffixlen = len(self.INDEXSUFFIX)
146 indexsuffixlen = len(self.INDEXSUFFIX)
146 packsuffixlen = len(self.PACKSUFFIX)
147 packsuffixlen = len(self.PACKSUFFIX)
147
148
148 ids = set()
149 ids = set()
149 sizes = collections.defaultdict(lambda: 0)
150 sizes = collections.defaultdict(lambda: 0)
150 mtimes = collections.defaultdict(lambda: [])
151 mtimes = collections.defaultdict(lambda: [])
151 try:
152 try:
152 for filename, type, stat in osutil.listdir(self.path, stat=True):
153 for filename, type, stat in osutil.listdir(self.path, stat=True):
153 id = None
154 id = None
154 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
155 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
155 id = filename[:-indexsuffixlen]
156 id = filename[:-indexsuffixlen]
156 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
157 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
157 id = filename[:-packsuffixlen]
158 id = filename[:-packsuffixlen]
158
159
159 # Since we expect to have two files corresponding to each ID
160 # Since we expect to have two files corresponding to each ID
160 # (the index file and the pack file), we can yield once we see
161 # (the index file and the pack file), we can yield once we see
161 # it twice.
162 # it twice.
162 if id:
163 if id:
163 sizes[id] += stat.st_size # Sum both files' sizes together
164 sizes[id] += stat.st_size # Sum both files' sizes together
164 mtimes[id].append(stat.st_mtime)
165 mtimes[id].append(stat.st_mtime)
165 if id in ids:
166 if id in ids:
166 yield (os.path.join(self.path, id), max(mtimes[id]),
167 yield (os.path.join(self.path, id), max(mtimes[id]),
167 sizes[id])
168 sizes[id])
168 else:
169 else:
169 ids.add(id)
170 ids.add(id)
170 except OSError as ex:
171 except OSError as ex:
171 if ex.errno != errno.ENOENT:
172 if ex.errno != errno.ENOENT:
172 raise
173 raise
173
174
174 def _getavailablepackfilessorted(self):
175 def _getavailablepackfilessorted(self):
175 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
176 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
176 yielding newest files first.
177 yielding newest files first.
177
178
178 This is desirable, since it is more likely newer packfiles have more
179 This is desirable, since it is more likely newer packfiles have more
179 desirable data.
180 desirable data.
180 """
181 """
181 files = []
182 files = []
182 for path, mtime, size in self._getavailablepackfiles():
183 for path, mtime, size in self._getavailablepackfiles():
183 files.append((mtime, size, path))
184 files.append((mtime, size, path))
184 files = sorted(files, reverse=True)
185 files = sorted(files, reverse=True)
185 for mtime, size, path in files:
186 for mtime, size, path in files:
186 yield path, mtime, size
187 yield path, mtime, size
187
188
188 def gettotalsizeandcount(self):
189 def gettotalsizeandcount(self):
189 """Returns the total disk size (in bytes) of all the pack files in
190 """Returns the total disk size (in bytes) of all the pack files in
190 this store, and the count of pack files.
191 this store, and the count of pack files.
191
192
192 (This might be smaller than the total size of the ``self.path``
193 (This might be smaller than the total size of the ``self.path``
193 directory, since this only considers fuly-writen pack files, and not
194 directory, since this only considers fuly-writen pack files, and not
194 temporary files or other detritus on the directory.)
195 temporary files or other detritus on the directory.)
195 """
196 """
196 totalsize = 0
197 totalsize = 0
197 count = 0
198 count = 0
198 for __, __, size in self._getavailablepackfiles():
199 for __, __, size in self._getavailablepackfiles():
199 totalsize += size
200 totalsize += size
200 count += 1
201 count += 1
201 return totalsize, count
202 return totalsize, count
202
203
203 def getmetrics(self):
204 def getmetrics(self):
204 """Returns metrics on the state of this store."""
205 """Returns metrics on the state of this store."""
205 size, count = self.gettotalsizeandcount()
206 size, count = self.gettotalsizeandcount()
206 return {
207 return {
207 'numpacks': count,
208 'numpacks': count,
208 'totalpacksize': size,
209 'totalpacksize': size,
209 }
210 }
210
211
211 def getpack(self, path):
212 def getpack(self, path):
212 raise NotImplementedError()
213 raise NotImplementedError()
213
214
214 def getmissing(self, keys):
215 def getmissing(self, keys):
215 missing = keys
216 missing = keys
216 for pack in self.packs:
217 for pack in self.packs:
217 missing = pack.getmissing(missing)
218 missing = pack.getmissing(missing)
218
219
219 # Ensures better performance of the cache by keeping the most
220 # Ensures better performance of the cache by keeping the most
220 # recently accessed pack at the beginning in subsequent iterations.
221 # recently accessed pack at the beginning in subsequent iterations.
221 if not missing:
222 if not missing:
222 return missing
223 return missing
223
224
224 if missing:
225 if missing:
225 for pack in self.refresh():
226 for pack in self.refresh():
226 missing = pack.getmissing(missing)
227 missing = pack.getmissing(missing)
227
228
228 return missing
229 return missing
229
230
230 def markledger(self, ledger, options=None):
231 def markledger(self, ledger, options=None):
231 for pack in self.packs:
232 for pack in self.packs:
232 pack.markledger(ledger)
233 pack.markledger(ledger)
233
234
234 def markforrefresh(self):
235 def markforrefresh(self):
235 """Tells the store that there may be new pack files, so the next time it
236 """Tells the store that there may be new pack files, so the next time it
236 has a lookup miss it should check for new files."""
237 has a lookup miss it should check for new files."""
237 self.lastrefresh = 0
238 self.lastrefresh = 0
238
239
239 def refresh(self):
240 def refresh(self):
240 """Checks for any new packs on disk, adds them to the main pack list,
241 """Checks for any new packs on disk, adds them to the main pack list,
241 and returns a list of just the new packs."""
242 and returns a list of just the new packs."""
242 now = time.time()
243 now = time.time()
243
244
244 # If we experience a lot of misses (like in the case of getmissing() on
245 # If we experience a lot of misses (like in the case of getmissing() on
245 # new objects), let's only actually check disk for new stuff every once
246 # new objects), let's only actually check disk for new stuff every once
246 # in a while. Generally this code path should only ever matter when a
247 # in a while. Generally this code path should only ever matter when a
247 # repack is going on in the background, and that should be pretty rare
248 # repack is going on in the background, and that should be pretty rare
248 # to have that happen twice in quick succession.
249 # to have that happen twice in quick succession.
249 newpacks = []
250 newpacks = []
250 if now > self.lastrefresh + REFRESHRATE:
251 if now > self.lastrefresh + REFRESHRATE:
251 self.lastrefresh = now
252 self.lastrefresh = now
252 previous = set(p.path for p in self.packs)
253 previous = set(p.path for p in self.packs)
253 for filepath, __, __ in self._getavailablepackfilessorted():
254 for filepath, __, __ in self._getavailablepackfilessorted():
254 if filepath not in previous:
255 if filepath not in previous:
255 newpack = self.getpack(filepath)
256 newpack = self.getpack(filepath)
256 newpacks.append(newpack)
257 newpacks.append(newpack)
257 self.packs.add(newpack)
258 self.packs.add(newpack)
258
259
259 return newpacks
260 return newpacks
260
261
261 class versionmixin(object):
262 class versionmixin(object):
262 # Mix-in for classes with multiple supported versions
263 # Mix-in for classes with multiple supported versions
263 VERSION = None
264 VERSION = None
264 SUPPORTED_VERSIONS = [2]
265 SUPPORTED_VERSIONS = [2]
265
266
266 def _checkversion(self, version):
267 def _checkversion(self, version):
267 if version in self.SUPPORTED_VERSIONS:
268 if version in self.SUPPORTED_VERSIONS:
268 if self.VERSION is None:
269 if self.VERSION is None:
269 # only affect this instance
270 # only affect this instance
270 self.VERSION = version
271 self.VERSION = version
271 elif self.VERSION != version:
272 elif self.VERSION != version:
272 raise RuntimeError('inconsistent version: %s' % version)
273 raise RuntimeError('inconsistent version: %s' % version)
273 else:
274 else:
274 raise RuntimeError('unsupported version: %s' % version)
275 raise RuntimeError('unsupported version: %s' % version)
275
276
276 class basepack(versionmixin):
277 class basepack(versionmixin):
277 # The maximum amount we should read via mmap before remmaping so the old
278 # The maximum amount we should read via mmap before remmaping so the old
278 # pages can be released (100MB)
279 # pages can be released (100MB)
279 MAXPAGEDIN = 100 * 1024**2
280 MAXPAGEDIN = 100 * 1024**2
280
281
281 SUPPORTED_VERSIONS = [2]
282 SUPPORTED_VERSIONS = [2]
282
283
283 def __init__(self, path):
284 def __init__(self, path):
284 self.path = path
285 self.path = path
285 self.packpath = path + self.PACKSUFFIX
286 self.packpath = path + self.PACKSUFFIX
286 self.indexpath = path + self.INDEXSUFFIX
287 self.indexpath = path + self.INDEXSUFFIX
287
288
288 self.indexsize = os.stat(self.indexpath).st_size
289 self.indexsize = os.stat(self.indexpath).st_size
289 self.datasize = os.stat(self.packpath).st_size
290 self.datasize = os.stat(self.packpath).st_size
290
291
291 self._index = None
292 self._index = None
292 self._data = None
293 self._data = None
293 self.freememory() # initialize the mmap
294 self.freememory() # initialize the mmap
294
295
295 version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
296 version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
296 self._checkversion(version)
297 self._checkversion(version)
297
298
298 version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
299 version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
299 self._checkversion(version)
300 self._checkversion(version)
300
301
301 if 0b10000000 & config:
302 if 0b10000000 & config:
302 self.params = indexparams(LARGEFANOUTPREFIX, version)
303 self.params = indexparams(LARGEFANOUTPREFIX, version)
303 else:
304 else:
304 self.params = indexparams(SMALLFANOUTPREFIX, version)
305 self.params = indexparams(SMALLFANOUTPREFIX, version)
305
306
306 @util.propertycache
307 @util.propertycache
307 def _fanouttable(self):
308 def _fanouttable(self):
308 params = self.params
309 params = self.params
309 rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
310 rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
310 fanouttable = []
311 fanouttable = []
311 for i in pycompat.xrange(0, params.fanoutcount):
312 for i in pycompat.xrange(0, params.fanoutcount):
312 loc = i * 4
313 loc = i * 4
313 fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
314 fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
314 fanouttable.append(fanoutentry)
315 fanouttable.append(fanoutentry)
315 return fanouttable
316 return fanouttable
316
317
317 @util.propertycache
318 @util.propertycache
318 def _indexend(self):
319 def _indexend(self):
319 nodecount = struct.unpack_from('!Q', self._index,
320 nodecount = struct.unpack_from('!Q', self._index,
320 self.params.indexstart - 8)[0]
321 self.params.indexstart - 8)[0]
321 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
322 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
322
323
323 def freememory(self):
324 def freememory(self):
324 """Unmap and remap the memory to free it up after known expensive
325 """Unmap and remap the memory to free it up after known expensive
325 operations. Return True if self._data and self._index were reloaded.
326 operations. Return True if self._data and self._index were reloaded.
326 """
327 """
327 if self._index:
328 if self._index:
328 if self._pagedin < self.MAXPAGEDIN:
329 if self._pagedin < self.MAXPAGEDIN:
329 return False
330 return False
330
331
331 self._index.close()
332 self._index.close()
332 self._data.close()
333 self._data.close()
333
334
334 # TODO: use an opener/vfs to access these paths
335 # TODO: use an opener/vfs to access these paths
335 with open(self.indexpath, PACKOPENMODE) as indexfp:
336 with open(self.indexpath, PACKOPENMODE) as indexfp:
336 # memory-map the file, size 0 means whole file
337 # memory-map the file, size 0 means whole file
337 self._index = mmap.mmap(indexfp.fileno(), 0,
338 self._index = mmap.mmap(indexfp.fileno(), 0,
338 access=mmap.ACCESS_READ)
339 access=mmap.ACCESS_READ)
339 with open(self.packpath, PACKOPENMODE) as datafp:
340 with open(self.packpath, PACKOPENMODE) as datafp:
340 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
341 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
341
342
342 self._pagedin = 0
343 self._pagedin = 0
343 return True
344 return True
344
345
345 def getmissing(self, keys):
346 def getmissing(self, keys):
346 raise NotImplementedError()
347 raise NotImplementedError()
347
348
348 def markledger(self, ledger, options=None):
349 def markledger(self, ledger, options=None):
349 raise NotImplementedError()
350 raise NotImplementedError()
350
351
351 def cleanup(self, ledger):
352 def cleanup(self, ledger):
352 raise NotImplementedError()
353 raise NotImplementedError()
353
354
354 def __iter__(self):
355 def __iter__(self):
355 raise NotImplementedError()
356 raise NotImplementedError()
356
357
357 def iterentries(self):
358 def iterentries(self):
358 raise NotImplementedError()
359 raise NotImplementedError()
359
360
360 class mutablebasepack(versionmixin):
361 class mutablebasepack(versionmixin):
361
362
362 def __init__(self, ui, packdir, version=2):
363 def __init__(self, ui, packdir, version=2):
363 self._checkversion(version)
364 self._checkversion(version)
364 # TODO(augie): make this configurable
365 # TODO(augie): make this configurable
365 self._compressor = 'GZ'
366 self._compressor = 'GZ'
366 opener = vfsmod.vfs(packdir)
367 opener = vfsmod.vfs(packdir)
367 opener.createmode = 0o444
368 opener.createmode = 0o444
368 self.opener = opener
369 self.opener = opener
369
370
370 self.entries = {}
371 self.entries = {}
371
372
372 shallowutil.mkstickygroupdir(ui, packdir)
373 shallowutil.mkstickygroupdir(ui, packdir)
373 self.packfp, self.packpath = opener.mkstemp(
374 self.packfp, self.packpath = opener.mkstemp(
374 suffix=self.PACKSUFFIX + '-tmp')
375 suffix=self.PACKSUFFIX + '-tmp')
375 self.idxfp, self.idxpath = opener.mkstemp(
376 self.idxfp, self.idxpath = opener.mkstemp(
376 suffix=self.INDEXSUFFIX + '-tmp')
377 suffix=self.INDEXSUFFIX + '-tmp')
377 self.packfp = os.fdopen(self.packfp, r'wb+')
378 self.packfp = os.fdopen(self.packfp, r'wb+')
378 self.idxfp = os.fdopen(self.idxfp, r'wb+')
379 self.idxfp = os.fdopen(self.idxfp, r'wb+')
379 self.sha = hashlib.sha1()
380 self.sha = hashlib.sha1()
380 self._closed = False
381 self._closed = False
381
382
382 # The opener provides no way of doing permission fixup on files created
383 # The opener provides no way of doing permission fixup on files created
383 # via mkstemp, so we must fix it ourselves. We can probably fix this
384 # via mkstemp, so we must fix it ourselves. We can probably fix this
384 # upstream in vfs.mkstemp so we don't need to use the private method.
385 # upstream in vfs.mkstemp so we don't need to use the private method.
385 opener._fixfilemode(opener.join(self.packpath))
386 opener._fixfilemode(opener.join(self.packpath))
386 opener._fixfilemode(opener.join(self.idxpath))
387 opener._fixfilemode(opener.join(self.idxpath))
387
388
388 # Write header
389 # Write header
389 # TODO: make it extensible (ex: allow specifying compression algorithm,
390 # TODO: make it extensible (ex: allow specifying compression algorithm,
390 # a flexible key/value header, delta algorithm, fanout size, etc)
391 # a flexible key/value header, delta algorithm, fanout size, etc)
391 versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
392 versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
392 self.writeraw(versionbuf)
393 self.writeraw(versionbuf)
393
394
394 def __enter__(self):
395 def __enter__(self):
395 return self
396 return self
396
397
397 def __exit__(self, exc_type, exc_value, traceback):
398 def __exit__(self, exc_type, exc_value, traceback):
398 if exc_type is None:
399 if exc_type is None:
399 self.close()
400 self.close()
400 else:
401 else:
401 self.abort()
402 self.abort()
402
403
403 def abort(self):
404 def abort(self):
404 # Unclean exit
405 # Unclean exit
405 self._cleantemppacks()
406 self._cleantemppacks()
406
407
407 def writeraw(self, data):
408 def writeraw(self, data):
408 self.packfp.write(data)
409 self.packfp.write(data)
409 self.sha.update(data)
410 self.sha.update(data)
410
411
411 def close(self, ledger=None):
412 def close(self, ledger=None):
412 if self._closed:
413 if self._closed:
413 return
414 return
414
415
415 try:
416 try:
416 sha = nodemod.hex(self.sha.digest())
417 sha = nodemod.hex(self.sha.digest())
417 self.packfp.close()
418 self.packfp.close()
418 self.writeindex()
419 self.writeindex()
419
420
420 if len(self.entries) == 0:
421 if len(self.entries) == 0:
421 # Empty pack
422 # Empty pack
422 self._cleantemppacks()
423 self._cleantemppacks()
423 self._closed = True
424 self._closed = True
424 return None
425 return None
425
426
426 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
427 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
427 try:
428 try:
428 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
429 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
429 except Exception as ex:
430 except Exception as ex:
430 try:
431 try:
431 self.opener.unlink(sha + self.PACKSUFFIX)
432 self.opener.unlink(sha + self.PACKSUFFIX)
432 except Exception:
433 except Exception:
433 pass
434 pass
434 # Throw exception 'ex' explicitly since a normal 'raise' would
435 # Throw exception 'ex' explicitly since a normal 'raise' would
435 # potentially throw an exception from the unlink cleanup.
436 # potentially throw an exception from the unlink cleanup.
436 raise ex
437 raise ex
437 except Exception:
438 except Exception:
438 # Clean up temp packs in all exception cases
439 # Clean up temp packs in all exception cases
439 self._cleantemppacks()
440 self._cleantemppacks()
440 raise
441 raise
441
442
442 self._closed = True
443 self._closed = True
443 result = self.opener.join(sha)
444 result = self.opener.join(sha)
444 if ledger:
445 if ledger:
445 ledger.addcreated(result)
446 ledger.addcreated(result)
446 return result
447 return result
447
448
448 def _cleantemppacks(self):
449 def _cleantemppacks(self):
449 try:
450 try:
450 self.opener.unlink(self.packpath)
451 self.opener.unlink(self.packpath)
451 except Exception:
452 except Exception:
452 pass
453 pass
453 try:
454 try:
454 self.opener.unlink(self.idxpath)
455 self.opener.unlink(self.idxpath)
455 except Exception:
456 except Exception:
456 pass
457 pass
457
458
458 def writeindex(self):
459 def writeindex(self):
459 rawindex = ''
460 rawindex = ''
460
461
461 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
462 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
462 if largefanout:
463 if largefanout:
463 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
464 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
464 else:
465 else:
465 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
466 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
466
467
467 fanouttable = [EMPTYFANOUT] * params.fanoutcount
468 fanouttable = [EMPTYFANOUT] * params.fanoutcount
468
469
469 # Precompute the location of each entry
470 # Precompute the location of each entry
470 locations = {}
471 locations = {}
471 count = 0
472 count = 0
472 for node in sorted(self.entries):
473 for node in sorted(self.entries):
473 location = count * self.INDEXENTRYLENGTH
474 location = count * self.INDEXENTRYLENGTH
474 locations[node] = location
475 locations[node] = location
475 count += 1
476 count += 1
476
477
477 # Must use [0] on the unpack result since it's always a tuple.
478 # Must use [0] on the unpack result since it's always a tuple.
478 fanoutkey = struct.unpack(params.fanoutstruct,
479 fanoutkey = struct.unpack(params.fanoutstruct,
479 node[:params.fanoutprefix])[0]
480 node[:params.fanoutprefix])[0]
480 if fanouttable[fanoutkey] == EMPTYFANOUT:
481 if fanouttable[fanoutkey] == EMPTYFANOUT:
481 fanouttable[fanoutkey] = location
482 fanouttable[fanoutkey] = location
482
483
483 rawfanouttable = ''
484 rawfanouttable = ''
484 last = 0
485 last = 0
485 for offset in fanouttable:
486 for offset in fanouttable:
486 offset = offset if offset != EMPTYFANOUT else last
487 offset = offset if offset != EMPTYFANOUT else last
487 last = offset
488 last = offset
488 rawfanouttable += struct.pack('!I', offset)
489 rawfanouttable += struct.pack('!I', offset)
489
490
490 rawentrieslength = struct.pack('!Q', len(self.entries))
491 rawentrieslength = struct.pack('!Q', len(self.entries))
491
492
492 # The index offset is the it's location in the file. So after the 2 byte
493 # The index offset is the it's location in the file. So after the 2 byte
493 # header and the fanouttable.
494 # header and the fanouttable.
494 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
495 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
495
496
496 self._writeheader(params)
497 self._writeheader(params)
497 self.idxfp.write(rawfanouttable)
498 self.idxfp.write(rawfanouttable)
498 self.idxfp.write(rawentrieslength)
499 self.idxfp.write(rawentrieslength)
499 self.idxfp.write(rawindex)
500 self.idxfp.write(rawindex)
500 self.idxfp.close()
501 self.idxfp.close()
501
502
502 def createindex(self, nodelocations):
503 def createindex(self, nodelocations):
503 raise NotImplementedError()
504 raise NotImplementedError()
504
505
505 def _writeheader(self, indexparams):
506 def _writeheader(self, indexparams):
506 # Index header
507 # Index header
507 # <version: 1 byte>
508 # <version: 1 byte>
508 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
509 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
509 # <unused: 7 bit> # future use (compression, delta format, etc)
510 # <unused: 7 bit> # future use (compression, delta format, etc)
510 config = 0
511 config = 0
511 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
512 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
512 config = 0b10000000
513 config = 0b10000000
513 self.idxfp.write(struct.pack('!BB', self.VERSION, config))
514 self.idxfp.write(struct.pack('!BB', self.VERSION, config))
514
515
515 class indexparams(object):
516 class indexparams(object):
516 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
517 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
517 r'fanoutsize', r'indexstart')
518 r'fanoutsize', r'indexstart')
518
519
519 def __init__(self, prefixsize, version):
520 def __init__(self, prefixsize, version):
520 self.fanoutprefix = prefixsize
521 self.fanoutprefix = prefixsize
521
522
522 # The struct pack format for fanout table location (i.e. the format that
523 # The struct pack format for fanout table location (i.e. the format that
523 # converts the node prefix into an integer location in the fanout
524 # converts the node prefix into an integer location in the fanout
524 # table).
525 # table).
525 if prefixsize == SMALLFANOUTPREFIX:
526 if prefixsize == SMALLFANOUTPREFIX:
526 self.fanoutstruct = '!B'
527 self.fanoutstruct = '!B'
527 elif prefixsize == LARGEFANOUTPREFIX:
528 elif prefixsize == LARGEFANOUTPREFIX:
528 self.fanoutstruct = '!H'
529 self.fanoutstruct = '!H'
529 else:
530 else:
530 raise ValueError("invalid fanout prefix size: %s" % prefixsize)
531 raise ValueError("invalid fanout prefix size: %s" % prefixsize)
531
532
532 # The number of fanout table entries
533 # The number of fanout table entries
533 self.fanoutcount = 2**(prefixsize * 8)
534 self.fanoutcount = 2**(prefixsize * 8)
534
535
535 # The total bytes used by the fanout table
536 # The total bytes used by the fanout table
536 self.fanoutsize = self.fanoutcount * 4
537 self.fanoutsize = self.fanoutcount * 4
537
538
538 self.indexstart = FANOUTSTART + self.fanoutsize
539 self.indexstart = FANOUTSTART + self.fanoutsize
539 # Skip the index length
540 # Skip the index length
540 self.indexstart += 8
541 self.indexstart += 8
General Comments 0
You need to be logged in to leave comments. Login now