##// END OF EJS Templates
remotefilelog: fix various minor py3 problems...
Augie Fackler -
r40562:1419f780 default
parent child Browse files
Show More
@@ -1,539 +1,539 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import collections
3 import collections
4 import errno
4 import errno
5 import hashlib
5 import hashlib
6 import mmap
6 import mmap
7 import os
7 import os
8 import struct
8 import struct
9 import time
9 import time
10
10
11 from mercurial.i18n import _
11 from mercurial.i18n import _
12 from mercurial import (
12 from mercurial import (
13 policy,
13 policy,
14 pycompat,
14 pycompat,
15 util,
15 util,
16 vfs as vfsmod,
16 vfs as vfsmod,
17 )
17 )
18 from . import shallowutil
18 from . import shallowutil
19
19
20 osutil = policy.importmod(r'osutil')
20 osutil = policy.importmod(r'osutil')
21
21
22 # The pack version supported by this implementation. This will need to be
22 # The pack version supported by this implementation. This will need to be
23 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
23 # rev'd whenever the byte format changes. Ex: changing the fanout prefix,
24 # changing any of the int sizes, changing the delta algorithm, etc.
24 # changing any of the int sizes, changing the delta algorithm, etc.
25 PACKVERSIONSIZE = 1
25 PACKVERSIONSIZE = 1
26 INDEXVERSIONSIZE = 2
26 INDEXVERSIONSIZE = 2
27
27
28 FANOUTSTART = INDEXVERSIONSIZE
28 FANOUTSTART = INDEXVERSIONSIZE
29
29
30 # Constant that indicates a fanout table entry hasn't been filled in. (This does
30 # Constant that indicates a fanout table entry hasn't been filled in. (This does
31 # not get serialized)
31 # not get serialized)
32 EMPTYFANOUT = -1
32 EMPTYFANOUT = -1
33
33
34 # The fanout prefix is the number of bytes that can be addressed by the fanout
34 # The fanout prefix is the number of bytes that can be addressed by the fanout
35 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
35 # table. Example: a fanout prefix of 1 means we use the first byte of a hash to
36 # look in the fanout table (which will be 2^8 entries long).
36 # look in the fanout table (which will be 2^8 entries long).
37 SMALLFANOUTPREFIX = 1
37 SMALLFANOUTPREFIX = 1
38 LARGEFANOUTPREFIX = 2
38 LARGEFANOUTPREFIX = 2
39
39
40 # The number of entries in the index at which point we switch to a large fanout.
40 # The number of entries in the index at which point we switch to a large fanout.
41 # It is chosen to balance the linear scan through a sparse fanout, with the
41 # It is chosen to balance the linear scan through a sparse fanout, with the
42 # size of the bisect in actual index.
42 # size of the bisect in actual index.
43 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
43 # 2^16 / 8 was chosen because it trades off (1 step fanout scan + 5 step
44 # bisect) with (8 step fanout scan + 1 step bisect)
44 # bisect) with (8 step fanout scan + 1 step bisect)
45 # 5 step bisect = log(2^16 / 8 / 255) # fanout
45 # 5 step bisect = log(2^16 / 8 / 255) # fanout
46 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
46 # 10 step fanout scan = 2^16 / (2^16 / 8) # fanout space divided by entries
47 SMALLFANOUTCUTOFF = 2**16 / 8
47 SMALLFANOUTCUTOFF = 2**16 / 8
48
48
49 # The amount of time to wait between checking for new packs. This prevents an
49 # The amount of time to wait between checking for new packs. This prevents an
50 # exception when data is moved to a new pack after the process has already
50 # exception when data is moved to a new pack after the process has already
51 # loaded the pack list.
51 # loaded the pack list.
52 REFRESHRATE = 0.1
52 REFRESHRATE = 0.1
53
53
54 if pycompat.isposix:
54 if pycompat.isposix:
55 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
55 # With glibc 2.7+ the 'e' flag uses O_CLOEXEC when opening.
56 # The 'e' flag will be ignored on older versions of glibc.
56 # The 'e' flag will be ignored on older versions of glibc.
57 PACKOPENMODE = 'rbe'
57 PACKOPENMODE = 'rbe'
58 else:
58 else:
59 PACKOPENMODE = 'rb'
59 PACKOPENMODE = 'rb'
60
60
61 class _cachebackedpacks(object):
61 class _cachebackedpacks(object):
62 def __init__(self, packs, cachesize):
62 def __init__(self, packs, cachesize):
63 self._packs = set(packs)
63 self._packs = set(packs)
64 self._lrucache = util.lrucachedict(cachesize)
64 self._lrucache = util.lrucachedict(cachesize)
65 self._lastpack = None
65 self._lastpack = None
66
66
67 # Avoid cold start of the cache by populating the most recent packs
67 # Avoid cold start of the cache by populating the most recent packs
68 # in the cache.
68 # in the cache.
69 for i in reversed(range(min(cachesize, len(packs)))):
69 for i in reversed(range(min(cachesize, len(packs)))):
70 self._movetofront(packs[i])
70 self._movetofront(packs[i])
71
71
72 def _movetofront(self, pack):
72 def _movetofront(self, pack):
73 # This effectively makes pack the first entry in the cache.
73 # This effectively makes pack the first entry in the cache.
74 self._lrucache[pack] = True
74 self._lrucache[pack] = True
75
75
76 def _registerlastpackusage(self):
76 def _registerlastpackusage(self):
77 if self._lastpack is not None:
77 if self._lastpack is not None:
78 self._movetofront(self._lastpack)
78 self._movetofront(self._lastpack)
79 self._lastpack = None
79 self._lastpack = None
80
80
81 def add(self, pack):
81 def add(self, pack):
82 self._registerlastpackusage()
82 self._registerlastpackusage()
83
83
84 # This method will mostly be called when packs are not in cache.
84 # This method will mostly be called when packs are not in cache.
85 # Therefore, adding pack to the cache.
85 # Therefore, adding pack to the cache.
86 self._movetofront(pack)
86 self._movetofront(pack)
87 self._packs.add(pack)
87 self._packs.add(pack)
88
88
89 def __iter__(self):
89 def __iter__(self):
90 self._registerlastpackusage()
90 self._registerlastpackusage()
91
91
92 # Cache iteration is based on LRU.
92 # Cache iteration is based on LRU.
93 for pack in self._lrucache:
93 for pack in self._lrucache:
94 self._lastpack = pack
94 self._lastpack = pack
95 yield pack
95 yield pack
96
96
97 cachedpacks = set(pack for pack in self._lrucache)
97 cachedpacks = set(pack for pack in self._lrucache)
98 # Yield for paths not in the cache.
98 # Yield for paths not in the cache.
99 for pack in self._packs - cachedpacks:
99 for pack in self._packs - cachedpacks:
100 self._lastpack = pack
100 self._lastpack = pack
101 yield pack
101 yield pack
102
102
103 # Data not found in any pack.
103 # Data not found in any pack.
104 self._lastpack = None
104 self._lastpack = None
105
105
106 class basepackstore(object):
106 class basepackstore(object):
107 # Default cache size limit for the pack files.
107 # Default cache size limit for the pack files.
108 DEFAULTCACHESIZE = 100
108 DEFAULTCACHESIZE = 100
109
109
110 def __init__(self, ui, path):
110 def __init__(self, ui, path):
111 self.ui = ui
111 self.ui = ui
112 self.path = path
112 self.path = path
113
113
114 # lastrefesh is 0 so we'll immediately check for new packs on the first
114 # lastrefesh is 0 so we'll immediately check for new packs on the first
115 # failure.
115 # failure.
116 self.lastrefresh = 0
116 self.lastrefresh = 0
117
117
118 packs = []
118 packs = []
119 for filepath, __, __ in self._getavailablepackfilessorted():
119 for filepath, __, __ in self._getavailablepackfilessorted():
120 try:
120 try:
121 pack = self.getpack(filepath)
121 pack = self.getpack(filepath)
122 except Exception as ex:
122 except Exception as ex:
123 # An exception may be thrown if the pack file is corrupted
123 # An exception may be thrown if the pack file is corrupted
124 # somehow. Log a warning but keep going in this case, just
124 # somehow. Log a warning but keep going in this case, just
125 # skipping this pack file.
125 # skipping this pack file.
126 #
126 #
127 # If this is an ENOENT error then don't even bother logging.
127 # If this is an ENOENT error then don't even bother logging.
128 # Someone could have removed the file since we retrieved the
128 # Someone could have removed the file since we retrieved the
129 # list of paths.
129 # list of paths.
130 if getattr(ex, 'errno', None) != errno.ENOENT:
130 if getattr(ex, 'errno', None) != errno.ENOENT:
131 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
131 ui.warn(_('unable to load pack %s: %s\n') % (filepath, ex))
132 continue
132 continue
133 packs.append(pack)
133 packs.append(pack)
134
134
135 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
135 self.packs = _cachebackedpacks(packs, self.DEFAULTCACHESIZE)
136
136
137 def _getavailablepackfiles(self):
137 def _getavailablepackfiles(self):
138 """For each pack file (a index/data file combo), yields:
138 """For each pack file (a index/data file combo), yields:
139 (full path without extension, mtime, size)
139 (full path without extension, mtime, size)
140
140
141 mtime will be the mtime of the index/data file (whichever is newer)
141 mtime will be the mtime of the index/data file (whichever is newer)
142 size is the combined size of index/data file
142 size is the combined size of index/data file
143 """
143 """
144 indexsuffixlen = len(self.INDEXSUFFIX)
144 indexsuffixlen = len(self.INDEXSUFFIX)
145 packsuffixlen = len(self.PACKSUFFIX)
145 packsuffixlen = len(self.PACKSUFFIX)
146
146
147 ids = set()
147 ids = set()
148 sizes = collections.defaultdict(lambda: 0)
148 sizes = collections.defaultdict(lambda: 0)
149 mtimes = collections.defaultdict(lambda: [])
149 mtimes = collections.defaultdict(lambda: [])
150 try:
150 try:
151 for filename, type, stat in osutil.listdir(self.path, stat=True):
151 for filename, type, stat in osutil.listdir(self.path, stat=True):
152 id = None
152 id = None
153 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
153 if filename[-indexsuffixlen:] == self.INDEXSUFFIX:
154 id = filename[:-indexsuffixlen]
154 id = filename[:-indexsuffixlen]
155 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
155 elif filename[-packsuffixlen:] == self.PACKSUFFIX:
156 id = filename[:-packsuffixlen]
156 id = filename[:-packsuffixlen]
157
157
158 # Since we expect to have two files corresponding to each ID
158 # Since we expect to have two files corresponding to each ID
159 # (the index file and the pack file), we can yield once we see
159 # (the index file and the pack file), we can yield once we see
160 # it twice.
160 # it twice.
161 if id:
161 if id:
162 sizes[id] += stat.st_size # Sum both files' sizes together
162 sizes[id] += stat.st_size # Sum both files' sizes together
163 mtimes[id].append(stat.st_mtime)
163 mtimes[id].append(stat.st_mtime)
164 if id in ids:
164 if id in ids:
165 yield (os.path.join(self.path, id), max(mtimes[id]),
165 yield (os.path.join(self.path, id), max(mtimes[id]),
166 sizes[id])
166 sizes[id])
167 else:
167 else:
168 ids.add(id)
168 ids.add(id)
169 except OSError as ex:
169 except OSError as ex:
170 if ex.errno != errno.ENOENT:
170 if ex.errno != errno.ENOENT:
171 raise
171 raise
172
172
173 def _getavailablepackfilessorted(self):
173 def _getavailablepackfilessorted(self):
174 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
174 """Like `_getavailablepackfiles`, but also sorts the files by mtime,
175 yielding newest files first.
175 yielding newest files first.
176
176
177 This is desirable, since it is more likely newer packfiles have more
177 This is desirable, since it is more likely newer packfiles have more
178 desirable data.
178 desirable data.
179 """
179 """
180 files = []
180 files = []
181 for path, mtime, size in self._getavailablepackfiles():
181 for path, mtime, size in self._getavailablepackfiles():
182 files.append((mtime, size, path))
182 files.append((mtime, size, path))
183 files = sorted(files, reverse=True)
183 files = sorted(files, reverse=True)
184 for mtime, size, path in files:
184 for mtime, size, path in files:
185 yield path, mtime, size
185 yield path, mtime, size
186
186
187 def gettotalsizeandcount(self):
187 def gettotalsizeandcount(self):
188 """Returns the total disk size (in bytes) of all the pack files in
188 """Returns the total disk size (in bytes) of all the pack files in
189 this store, and the count of pack files.
189 this store, and the count of pack files.
190
190
191 (This might be smaller than the total size of the ``self.path``
191 (This might be smaller than the total size of the ``self.path``
192 directory, since this only considers fuly-writen pack files, and not
192 directory, since this only considers fuly-writen pack files, and not
193 temporary files or other detritus on the directory.)
193 temporary files or other detritus on the directory.)
194 """
194 """
195 totalsize = 0
195 totalsize = 0
196 count = 0
196 count = 0
197 for __, __, size in self._getavailablepackfiles():
197 for __, __, size in self._getavailablepackfiles():
198 totalsize += size
198 totalsize += size
199 count += 1
199 count += 1
200 return totalsize, count
200 return totalsize, count
201
201
202 def getmetrics(self):
202 def getmetrics(self):
203 """Returns metrics on the state of this store."""
203 """Returns metrics on the state of this store."""
204 size, count = self.gettotalsizeandcount()
204 size, count = self.gettotalsizeandcount()
205 return {
205 return {
206 'numpacks': count,
206 'numpacks': count,
207 'totalpacksize': size,
207 'totalpacksize': size,
208 }
208 }
209
209
210 def getpack(self, path):
210 def getpack(self, path):
211 raise NotImplementedError()
211 raise NotImplementedError()
212
212
213 def getmissing(self, keys):
213 def getmissing(self, keys):
214 missing = keys
214 missing = keys
215 for pack in self.packs:
215 for pack in self.packs:
216 missing = pack.getmissing(missing)
216 missing = pack.getmissing(missing)
217
217
218 # Ensures better performance of the cache by keeping the most
218 # Ensures better performance of the cache by keeping the most
219 # recently accessed pack at the beginning in subsequent iterations.
219 # recently accessed pack at the beginning in subsequent iterations.
220 if not missing:
220 if not missing:
221 return missing
221 return missing
222
222
223 if missing:
223 if missing:
224 for pack in self.refresh():
224 for pack in self.refresh():
225 missing = pack.getmissing(missing)
225 missing = pack.getmissing(missing)
226
226
227 return missing
227 return missing
228
228
229 def markledger(self, ledger, options=None):
229 def markledger(self, ledger, options=None):
230 for pack in self.packs:
230 for pack in self.packs:
231 pack.markledger(ledger)
231 pack.markledger(ledger)
232
232
233 def markforrefresh(self):
233 def markforrefresh(self):
234 """Tells the store that there may be new pack files, so the next time it
234 """Tells the store that there may be new pack files, so the next time it
235 has a lookup miss it should check for new files."""
235 has a lookup miss it should check for new files."""
236 self.lastrefresh = 0
236 self.lastrefresh = 0
237
237
238 def refresh(self):
238 def refresh(self):
239 """Checks for any new packs on disk, adds them to the main pack list,
239 """Checks for any new packs on disk, adds them to the main pack list,
240 and returns a list of just the new packs."""
240 and returns a list of just the new packs."""
241 now = time.time()
241 now = time.time()
242
242
243 # If we experience a lot of misses (like in the case of getmissing() on
243 # If we experience a lot of misses (like in the case of getmissing() on
244 # new objects), let's only actually check disk for new stuff every once
244 # new objects), let's only actually check disk for new stuff every once
245 # in a while. Generally this code path should only ever matter when a
245 # in a while. Generally this code path should only ever matter when a
246 # repack is going on in the background, and that should be pretty rare
246 # repack is going on in the background, and that should be pretty rare
247 # to have that happen twice in quick succession.
247 # to have that happen twice in quick succession.
248 newpacks = []
248 newpacks = []
249 if now > self.lastrefresh + REFRESHRATE:
249 if now > self.lastrefresh + REFRESHRATE:
250 self.lastrefresh = now
250 self.lastrefresh = now
251 previous = set(p.path for p in self.packs)
251 previous = set(p.path for p in self.packs)
252 for filepath, __, __ in self._getavailablepackfilessorted():
252 for filepath, __, __ in self._getavailablepackfilessorted():
253 if filepath not in previous:
253 if filepath not in previous:
254 newpack = self.getpack(filepath)
254 newpack = self.getpack(filepath)
255 newpacks.append(newpack)
255 newpacks.append(newpack)
256 self.packs.add(newpack)
256 self.packs.add(newpack)
257
257
258 return newpacks
258 return newpacks
259
259
260 class versionmixin(object):
260 class versionmixin(object):
261 # Mix-in for classes with multiple supported versions
261 # Mix-in for classes with multiple supported versions
262 VERSION = None
262 VERSION = None
263 SUPPORTED_VERSIONS = [2]
263 SUPPORTED_VERSIONS = [2]
264
264
265 def _checkversion(self, version):
265 def _checkversion(self, version):
266 if version in self.SUPPORTED_VERSIONS:
266 if version in self.SUPPORTED_VERSIONS:
267 if self.VERSION is None:
267 if self.VERSION is None:
268 # only affect this instance
268 # only affect this instance
269 self.VERSION = version
269 self.VERSION = version
270 elif self.VERSION != version:
270 elif self.VERSION != version:
271 raise RuntimeError('inconsistent version: %s' % version)
271 raise RuntimeError('inconsistent version: %s' % version)
272 else:
272 else:
273 raise RuntimeError('unsupported version: %s' % version)
273 raise RuntimeError('unsupported version: %s' % version)
274
274
275 class basepack(versionmixin):
275 class basepack(versionmixin):
276 # The maximum amount we should read via mmap before remmaping so the old
276 # The maximum amount we should read via mmap before remmaping so the old
277 # pages can be released (100MB)
277 # pages can be released (100MB)
278 MAXPAGEDIN = 100 * 1024**2
278 MAXPAGEDIN = 100 * 1024**2
279
279
280 SUPPORTED_VERSIONS = [2]
280 SUPPORTED_VERSIONS = [2]
281
281
282 def __init__(self, path):
282 def __init__(self, path):
283 self.path = path
283 self.path = path
284 self.packpath = path + self.PACKSUFFIX
284 self.packpath = path + self.PACKSUFFIX
285 self.indexpath = path + self.INDEXSUFFIX
285 self.indexpath = path + self.INDEXSUFFIX
286
286
287 self.indexsize = os.stat(self.indexpath).st_size
287 self.indexsize = os.stat(self.indexpath).st_size
288 self.datasize = os.stat(self.packpath).st_size
288 self.datasize = os.stat(self.packpath).st_size
289
289
290 self._index = None
290 self._index = None
291 self._data = None
291 self._data = None
292 self.freememory() # initialize the mmap
292 self.freememory() # initialize the mmap
293
293
294 version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
294 version = struct.unpack('!B', self._data[:PACKVERSIONSIZE])[0]
295 self._checkversion(version)
295 self._checkversion(version)
296
296
297 version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
297 version, config = struct.unpack('!BB', self._index[:INDEXVERSIONSIZE])
298 self._checkversion(version)
298 self._checkversion(version)
299
299
300 if 0b10000000 & config:
300 if 0b10000000 & config:
301 self.params = indexparams(LARGEFANOUTPREFIX, version)
301 self.params = indexparams(LARGEFANOUTPREFIX, version)
302 else:
302 else:
303 self.params = indexparams(SMALLFANOUTPREFIX, version)
303 self.params = indexparams(SMALLFANOUTPREFIX, version)
304
304
305 @util.propertycache
305 @util.propertycache
306 def _fanouttable(self):
306 def _fanouttable(self):
307 params = self.params
307 params = self.params
308 rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
308 rawfanout = self._index[FANOUTSTART:FANOUTSTART + params.fanoutsize]
309 fanouttable = []
309 fanouttable = []
310 for i in pycompat.xrange(0, params.fanoutcount):
310 for i in pycompat.xrange(0, params.fanoutcount):
311 loc = i * 4
311 loc = i * 4
312 fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
312 fanoutentry = struct.unpack('!I', rawfanout[loc:loc + 4])[0]
313 fanouttable.append(fanoutentry)
313 fanouttable.append(fanoutentry)
314 return fanouttable
314 return fanouttable
315
315
316 @util.propertycache
316 @util.propertycache
317 def _indexend(self):
317 def _indexend(self):
318 nodecount = struct.unpack_from('!Q', self._index,
318 nodecount = struct.unpack_from('!Q', self._index,
319 self.params.indexstart - 8)[0]
319 self.params.indexstart - 8)[0]
320 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
320 return self.params.indexstart + nodecount * self.INDEXENTRYLENGTH
321
321
322 def freememory(self):
322 def freememory(self):
323 """Unmap and remap the memory to free it up after known expensive
323 """Unmap and remap the memory to free it up after known expensive
324 operations. Return True if self._data and self._index were reloaded.
324 operations. Return True if self._data and self._index were reloaded.
325 """
325 """
326 if self._index:
326 if self._index:
327 if self._pagedin < self.MAXPAGEDIN:
327 if self._pagedin < self.MAXPAGEDIN:
328 return False
328 return False
329
329
330 self._index.close()
330 self._index.close()
331 self._data.close()
331 self._data.close()
332
332
333 # TODO: use an opener/vfs to access these paths
333 # TODO: use an opener/vfs to access these paths
334 with open(self.indexpath, PACKOPENMODE) as indexfp:
334 with open(self.indexpath, PACKOPENMODE) as indexfp:
335 # memory-map the file, size 0 means whole file
335 # memory-map the file, size 0 means whole file
336 self._index = mmap.mmap(indexfp.fileno(), 0,
336 self._index = mmap.mmap(indexfp.fileno(), 0,
337 access=mmap.ACCESS_READ)
337 access=mmap.ACCESS_READ)
338 with open(self.packpath, PACKOPENMODE) as datafp:
338 with open(self.packpath, PACKOPENMODE) as datafp:
339 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
339 self._data = mmap.mmap(datafp.fileno(), 0, access=mmap.ACCESS_READ)
340
340
341 self._pagedin = 0
341 self._pagedin = 0
342 return True
342 return True
343
343
344 def getmissing(self, keys):
344 def getmissing(self, keys):
345 raise NotImplementedError()
345 raise NotImplementedError()
346
346
347 def markledger(self, ledger, options=None):
347 def markledger(self, ledger, options=None):
348 raise NotImplementedError()
348 raise NotImplementedError()
349
349
350 def cleanup(self, ledger):
350 def cleanup(self, ledger):
351 raise NotImplementedError()
351 raise NotImplementedError()
352
352
353 def __iter__(self):
353 def __iter__(self):
354 raise NotImplementedError()
354 raise NotImplementedError()
355
355
356 def iterentries(self):
356 def iterentries(self):
357 raise NotImplementedError()
357 raise NotImplementedError()
358
358
359 class mutablebasepack(versionmixin):
359 class mutablebasepack(versionmixin):
360
360
361 def __init__(self, ui, packdir, version=2):
361 def __init__(self, ui, packdir, version=2):
362 self._checkversion(version)
362 self._checkversion(version)
363 # TODO(augie): make this configurable
363 # TODO(augie): make this configurable
364 self._compressor = 'GZ'
364 self._compressor = 'GZ'
365 opener = vfsmod.vfs(packdir)
365 opener = vfsmod.vfs(packdir)
366 opener.createmode = 0o444
366 opener.createmode = 0o444
367 self.opener = opener
367 self.opener = opener
368
368
369 self.entries = {}
369 self.entries = {}
370
370
371 shallowutil.mkstickygroupdir(ui, packdir)
371 shallowutil.mkstickygroupdir(ui, packdir)
372 self.packfp, self.packpath = opener.mkstemp(
372 self.packfp, self.packpath = opener.mkstemp(
373 suffix=self.PACKSUFFIX + '-tmp')
373 suffix=self.PACKSUFFIX + '-tmp')
374 self.idxfp, self.idxpath = opener.mkstemp(
374 self.idxfp, self.idxpath = opener.mkstemp(
375 suffix=self.INDEXSUFFIX + '-tmp')
375 suffix=self.INDEXSUFFIX + '-tmp')
376 self.packfp = os.fdopen(self.packfp, 'w+')
376 self.packfp = os.fdopen(self.packfp, 'w+')
377 self.idxfp = os.fdopen(self.idxfp, 'w+')
377 self.idxfp = os.fdopen(self.idxfp, 'w+')
378 self.sha = hashlib.sha1()
378 self.sha = hashlib.sha1()
379 self._closed = False
379 self._closed = False
380
380
381 # The opener provides no way of doing permission fixup on files created
381 # The opener provides no way of doing permission fixup on files created
382 # via mkstemp, so we must fix it ourselves. We can probably fix this
382 # via mkstemp, so we must fix it ourselves. We can probably fix this
383 # upstream in vfs.mkstemp so we don't need to use the private method.
383 # upstream in vfs.mkstemp so we don't need to use the private method.
384 opener._fixfilemode(opener.join(self.packpath))
384 opener._fixfilemode(opener.join(self.packpath))
385 opener._fixfilemode(opener.join(self.idxpath))
385 opener._fixfilemode(opener.join(self.idxpath))
386
386
387 # Write header
387 # Write header
388 # TODO: make it extensible (ex: allow specifying compression algorithm,
388 # TODO: make it extensible (ex: allow specifying compression algorithm,
389 # a flexible key/value header, delta algorithm, fanout size, etc)
389 # a flexible key/value header, delta algorithm, fanout size, etc)
390 versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
390 versionbuf = struct.pack('!B', self.VERSION) # unsigned 1 byte int
391 self.writeraw(versionbuf)
391 self.writeraw(versionbuf)
392
392
393 def __enter__(self):
393 def __enter__(self):
394 return self
394 return self
395
395
396 def __exit__(self, exc_type, exc_value, traceback):
396 def __exit__(self, exc_type, exc_value, traceback):
397 if exc_type is None:
397 if exc_type is None:
398 self.close()
398 self.close()
399 else:
399 else:
400 self.abort()
400 self.abort()
401
401
402 def abort(self):
402 def abort(self):
403 # Unclean exit
403 # Unclean exit
404 self._cleantemppacks()
404 self._cleantemppacks()
405
405
406 def writeraw(self, data):
406 def writeraw(self, data):
407 self.packfp.write(data)
407 self.packfp.write(data)
408 self.sha.update(data)
408 self.sha.update(data)
409
409
410 def close(self, ledger=None):
410 def close(self, ledger=None):
411 if self._closed:
411 if self._closed:
412 return
412 return
413
413
414 try:
414 try:
415 sha = self.sha.hexdigest()
415 sha = self.sha.hexdigest()
416 self.packfp.close()
416 self.packfp.close()
417 self.writeindex()
417 self.writeindex()
418
418
419 if len(self.entries) == 0:
419 if len(self.entries) == 0:
420 # Empty pack
420 # Empty pack
421 self._cleantemppacks()
421 self._cleantemppacks()
422 self._closed = True
422 self._closed = True
423 return None
423 return None
424
424
425 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
425 self.opener.rename(self.packpath, sha + self.PACKSUFFIX)
426 try:
426 try:
427 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
427 self.opener.rename(self.idxpath, sha + self.INDEXSUFFIX)
428 except Exception as ex:
428 except Exception as ex:
429 try:
429 try:
430 self.opener.unlink(sha + self.PACKSUFFIX)
430 self.opener.unlink(sha + self.PACKSUFFIX)
431 except Exception:
431 except Exception:
432 pass
432 pass
433 # Throw exception 'ex' explicitly since a normal 'raise' would
433 # Throw exception 'ex' explicitly since a normal 'raise' would
434 # potentially throw an exception from the unlink cleanup.
434 # potentially throw an exception from the unlink cleanup.
435 raise ex
435 raise ex
436 except Exception:
436 except Exception:
437 # Clean up temp packs in all exception cases
437 # Clean up temp packs in all exception cases
438 self._cleantemppacks()
438 self._cleantemppacks()
439 raise
439 raise
440
440
441 self._closed = True
441 self._closed = True
442 result = self.opener.join(sha)
442 result = self.opener.join(sha)
443 if ledger:
443 if ledger:
444 ledger.addcreated(result)
444 ledger.addcreated(result)
445 return result
445 return result
446
446
447 def _cleantemppacks(self):
447 def _cleantemppacks(self):
448 try:
448 try:
449 self.opener.unlink(self.packpath)
449 self.opener.unlink(self.packpath)
450 except Exception:
450 except Exception:
451 pass
451 pass
452 try:
452 try:
453 self.opener.unlink(self.idxpath)
453 self.opener.unlink(self.idxpath)
454 except Exception:
454 except Exception:
455 pass
455 pass
456
456
457 def writeindex(self):
457 def writeindex(self):
458 rawindex = ''
458 rawindex = ''
459
459
460 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
460 largefanout = len(self.entries) > SMALLFANOUTCUTOFF
461 if largefanout:
461 if largefanout:
462 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
462 params = indexparams(LARGEFANOUTPREFIX, self.VERSION)
463 else:
463 else:
464 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
464 params = indexparams(SMALLFANOUTPREFIX, self.VERSION)
465
465
466 fanouttable = [EMPTYFANOUT] * params.fanoutcount
466 fanouttable = [EMPTYFANOUT] * params.fanoutcount
467
467
468 # Precompute the location of each entry
468 # Precompute the location of each entry
469 locations = {}
469 locations = {}
470 count = 0
470 count = 0
471 for node in sorted(self.entries.iterkeys()):
471 for node in sorted(self.entries.iterkeys()):
472 location = count * self.INDEXENTRYLENGTH
472 location = count * self.INDEXENTRYLENGTH
473 locations[node] = location
473 locations[node] = location
474 count += 1
474 count += 1
475
475
476 # Must use [0] on the unpack result since it's always a tuple.
476 # Must use [0] on the unpack result since it's always a tuple.
477 fanoutkey = struct.unpack(params.fanoutstruct,
477 fanoutkey = struct.unpack(params.fanoutstruct,
478 node[:params.fanoutprefix])[0]
478 node[:params.fanoutprefix])[0]
479 if fanouttable[fanoutkey] == EMPTYFANOUT:
479 if fanouttable[fanoutkey] == EMPTYFANOUT:
480 fanouttable[fanoutkey] = location
480 fanouttable[fanoutkey] = location
481
481
482 rawfanouttable = ''
482 rawfanouttable = ''
483 last = 0
483 last = 0
484 for offset in fanouttable:
484 for offset in fanouttable:
485 offset = offset if offset != EMPTYFANOUT else last
485 offset = offset if offset != EMPTYFANOUT else last
486 last = offset
486 last = offset
487 rawfanouttable += struct.pack('!I', offset)
487 rawfanouttable += struct.pack('!I', offset)
488
488
489 rawentrieslength = struct.pack('!Q', len(self.entries))
489 rawentrieslength = struct.pack('!Q', len(self.entries))
490
490
491 # The index offset is the it's location in the file. So after the 2 byte
491 # The index offset is the it's location in the file. So after the 2 byte
492 # header and the fanouttable.
492 # header and the fanouttable.
493 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
493 rawindex = self.createindex(locations, 2 + len(rawfanouttable))
494
494
495 self._writeheader(params)
495 self._writeheader(params)
496 self.idxfp.write(rawfanouttable)
496 self.idxfp.write(rawfanouttable)
497 self.idxfp.write(rawentrieslength)
497 self.idxfp.write(rawentrieslength)
498 self.idxfp.write(rawindex)
498 self.idxfp.write(rawindex)
499 self.idxfp.close()
499 self.idxfp.close()
500
500
501 def createindex(self, nodelocations):
501 def createindex(self, nodelocations):
502 raise NotImplementedError()
502 raise NotImplementedError()
503
503
504 def _writeheader(self, indexparams):
504 def _writeheader(self, indexparams):
505 # Index header
505 # Index header
506 # <version: 1 byte>
506 # <version: 1 byte>
507 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
507 # <large fanout: 1 bit> # 1 means 2^16, 0 means 2^8
508 # <unused: 7 bit> # future use (compression, delta format, etc)
508 # <unused: 7 bit> # future use (compression, delta format, etc)
509 config = 0
509 config = 0
510 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
510 if indexparams.fanoutprefix == LARGEFANOUTPREFIX:
511 config = 0b10000000
511 config = 0b10000000
512 self.idxfp.write(struct.pack('!BB', self.VERSION, config))
512 self.idxfp.write(struct.pack('!BB', self.VERSION, config))
513
513
514 class indexparams(object):
514 class indexparams(object):
515 __slots__ = ('fanoutprefix', 'fanoutstruct', 'fanoutcount', 'fanoutsize',
515 __slots__ = (r'fanoutprefix', r'fanoutstruct', r'fanoutcount',
516 'indexstart')
516 r'fanoutsize', r'indexstart')
517
517
518 def __init__(self, prefixsize, version):
518 def __init__(self, prefixsize, version):
519 self.fanoutprefix = prefixsize
519 self.fanoutprefix = prefixsize
520
520
521 # The struct pack format for fanout table location (i.e. the format that
521 # The struct pack format for fanout table location (i.e. the format that
522 # converts the node prefix into an integer location in the fanout
522 # converts the node prefix into an integer location in the fanout
523 # table).
523 # table).
524 if prefixsize == SMALLFANOUTPREFIX:
524 if prefixsize == SMALLFANOUTPREFIX:
525 self.fanoutstruct = '!B'
525 self.fanoutstruct = '!B'
526 elif prefixsize == LARGEFANOUTPREFIX:
526 elif prefixsize == LARGEFANOUTPREFIX:
527 self.fanoutstruct = '!H'
527 self.fanoutstruct = '!H'
528 else:
528 else:
529 raise ValueError("invalid fanout prefix size: %s" % prefixsize)
529 raise ValueError("invalid fanout prefix size: %s" % prefixsize)
530
530
531 # The number of fanout table entries
531 # The number of fanout table entries
532 self.fanoutcount = 2**(prefixsize * 8)
532 self.fanoutcount = 2**(prefixsize * 8)
533
533
534 # The total bytes used by the fanout table
534 # The total bytes used by the fanout table
535 self.fanoutsize = self.fanoutcount * 4
535 self.fanoutsize = self.fanoutcount * 4
536
536
537 self.indexstart = FANOUTSTART + self.fanoutsize
537 self.indexstart = FANOUTSTART + self.fanoutsize
538 # Skip the index length
538 # Skip the index length
539 self.indexstart += 8
539 self.indexstart += 8
@@ -1,781 +1,781 b''
1 from __future__ import absolute_import
1 from __future__ import absolute_import
2
2
3 import os
3 import os
4 import time
4 import time
5
5
6 from mercurial.i18n import _
6 from mercurial.i18n import _
7 from mercurial.node import (
7 from mercurial.node import (
8 nullid,
8 nullid,
9 short,
9 short,
10 )
10 )
11 from mercurial import (
11 from mercurial import (
12 encoding,
12 encoding,
13 error,
13 error,
14 mdiff,
14 mdiff,
15 policy,
15 policy,
16 pycompat,
16 pycompat,
17 scmutil,
17 scmutil,
18 util,
18 util,
19 vfs,
19 vfs,
20 )
20 )
21 from mercurial.utils import procutil
21 from mercurial.utils import procutil
22 from . import (
22 from . import (
23 constants,
23 constants,
24 contentstore,
24 contentstore,
25 datapack,
25 datapack,
26 extutil,
26 extutil,
27 historypack,
27 historypack,
28 metadatastore,
28 metadatastore,
29 shallowutil,
29 shallowutil,
30 )
30 )
31
31
32 osutil = policy.importmod(r'osutil')
32 osutil = policy.importmod(r'osutil')
33
33
34 class RepackAlreadyRunning(error.Abort):
34 class RepackAlreadyRunning(error.Abort):
35 pass
35 pass
36
36
37 if util.safehasattr(util, '_hgexecutable'):
37 if util.safehasattr(util, '_hgexecutable'):
38 # Before 5be286db
38 # Before 5be286db
39 _hgexecutable = util.hgexecutable
39 _hgexecutable = util.hgexecutable
40 else:
40 else:
41 from mercurial.utils import procutil
41 from mercurial.utils import procutil
42 _hgexecutable = procutil.hgexecutable
42 _hgexecutable = procutil.hgexecutable
43
43
44 def backgroundrepack(repo, incremental=True, packsonly=False):
44 def backgroundrepack(repo, incremental=True, packsonly=False):
45 cmd = [_hgexecutable(), '-R', repo.origroot, 'repack']
45 cmd = [_hgexecutable(), '-R', repo.origroot, 'repack']
46 msg = _("(running background repack)\n")
46 msg = _("(running background repack)\n")
47 if incremental:
47 if incremental:
48 cmd.append('--incremental')
48 cmd.append('--incremental')
49 msg = _("(running background incremental repack)\n")
49 msg = _("(running background incremental repack)\n")
50 if packsonly:
50 if packsonly:
51 cmd.append('--packsonly')
51 cmd.append('--packsonly')
52 repo.ui.warn(msg)
52 repo.ui.warn(msg)
53 procutil.runbgcommand(cmd, encoding.environ)
53 procutil.runbgcommand(cmd, encoding.environ)
54
54
55 def fullrepack(repo, options=None):
55 def fullrepack(repo, options=None):
56 """If ``packsonly`` is True, stores creating only loose objects are skipped.
56 """If ``packsonly`` is True, stores creating only loose objects are skipped.
57 """
57 """
58 if util.safehasattr(repo, 'shareddatastores'):
58 if util.safehasattr(repo, 'shareddatastores'):
59 datasource = contentstore.unioncontentstore(
59 datasource = contentstore.unioncontentstore(
60 *repo.shareddatastores)
60 *repo.shareddatastores)
61 historysource = metadatastore.unionmetadatastore(
61 historysource = metadatastore.unionmetadatastore(
62 *repo.sharedhistorystores,
62 *repo.sharedhistorystores,
63 allowincomplete=True)
63 allowincomplete=True)
64
64
65 packpath = shallowutil.getcachepackpath(
65 packpath = shallowutil.getcachepackpath(
66 repo,
66 repo,
67 constants.FILEPACK_CATEGORY)
67 constants.FILEPACK_CATEGORY)
68 _runrepack(repo, datasource, historysource, packpath,
68 _runrepack(repo, datasource, historysource, packpath,
69 constants.FILEPACK_CATEGORY, options=options)
69 constants.FILEPACK_CATEGORY, options=options)
70
70
71 if util.safehasattr(repo.manifestlog, 'datastore'):
71 if util.safehasattr(repo.manifestlog, 'datastore'):
72 localdata, shareddata = _getmanifeststores(repo)
72 localdata, shareddata = _getmanifeststores(repo)
73 lpackpath, ldstores, lhstores = localdata
73 lpackpath, ldstores, lhstores = localdata
74 spackpath, sdstores, shstores = shareddata
74 spackpath, sdstores, shstores = shareddata
75
75
76 # Repack the shared manifest store
76 # Repack the shared manifest store
77 datasource = contentstore.unioncontentstore(*sdstores)
77 datasource = contentstore.unioncontentstore(*sdstores)
78 historysource = metadatastore.unionmetadatastore(
78 historysource = metadatastore.unionmetadatastore(
79 *shstores,
79 *shstores,
80 allowincomplete=True)
80 allowincomplete=True)
81 _runrepack(repo, datasource, historysource, spackpath,
81 _runrepack(repo, datasource, historysource, spackpath,
82 constants.TREEPACK_CATEGORY, options=options)
82 constants.TREEPACK_CATEGORY, options=options)
83
83
84 # Repack the local manifest store
84 # Repack the local manifest store
85 datasource = contentstore.unioncontentstore(
85 datasource = contentstore.unioncontentstore(
86 *ldstores,
86 *ldstores,
87 allowincomplete=True)
87 allowincomplete=True)
88 historysource = metadatastore.unionmetadatastore(
88 historysource = metadatastore.unionmetadatastore(
89 *lhstores,
89 *lhstores,
90 allowincomplete=True)
90 allowincomplete=True)
91 _runrepack(repo, datasource, historysource, lpackpath,
91 _runrepack(repo, datasource, historysource, lpackpath,
92 constants.TREEPACK_CATEGORY, options=options)
92 constants.TREEPACK_CATEGORY, options=options)
93
93
94 def incrementalrepack(repo, options=None):
94 def incrementalrepack(repo, options=None):
95 """This repacks the repo by looking at the distribution of pack files in the
95 """This repacks the repo by looking at the distribution of pack files in the
96 repo and performing the most minimal repack to keep the repo in good shape.
96 repo and performing the most minimal repack to keep the repo in good shape.
97 """
97 """
98 if util.safehasattr(repo, 'shareddatastores'):
98 if util.safehasattr(repo, 'shareddatastores'):
99 packpath = shallowutil.getcachepackpath(
99 packpath = shallowutil.getcachepackpath(
100 repo,
100 repo,
101 constants.FILEPACK_CATEGORY)
101 constants.FILEPACK_CATEGORY)
102 _incrementalrepack(repo,
102 _incrementalrepack(repo,
103 repo.shareddatastores,
103 repo.shareddatastores,
104 repo.sharedhistorystores,
104 repo.sharedhistorystores,
105 packpath,
105 packpath,
106 constants.FILEPACK_CATEGORY,
106 constants.FILEPACK_CATEGORY,
107 options=options)
107 options=options)
108
108
109 if util.safehasattr(repo.manifestlog, 'datastore'):
109 if util.safehasattr(repo.manifestlog, 'datastore'):
110 localdata, shareddata = _getmanifeststores(repo)
110 localdata, shareddata = _getmanifeststores(repo)
111 lpackpath, ldstores, lhstores = localdata
111 lpackpath, ldstores, lhstores = localdata
112 spackpath, sdstores, shstores = shareddata
112 spackpath, sdstores, shstores = shareddata
113
113
114 # Repack the shared manifest store
114 # Repack the shared manifest store
115 _incrementalrepack(repo,
115 _incrementalrepack(repo,
116 sdstores,
116 sdstores,
117 shstores,
117 shstores,
118 spackpath,
118 spackpath,
119 constants.TREEPACK_CATEGORY,
119 constants.TREEPACK_CATEGORY,
120 options=options)
120 options=options)
121
121
122 # Repack the local manifest store
122 # Repack the local manifest store
123 _incrementalrepack(repo,
123 _incrementalrepack(repo,
124 ldstores,
124 ldstores,
125 lhstores,
125 lhstores,
126 lpackpath,
126 lpackpath,
127 constants.TREEPACK_CATEGORY,
127 constants.TREEPACK_CATEGORY,
128 allowincompletedata=True,
128 allowincompletedata=True,
129 options=options)
129 options=options)
130
130
131 def _getmanifeststores(repo):
131 def _getmanifeststores(repo):
132 shareddatastores = repo.manifestlog.shareddatastores
132 shareddatastores = repo.manifestlog.shareddatastores
133 localdatastores = repo.manifestlog.localdatastores
133 localdatastores = repo.manifestlog.localdatastores
134 sharedhistorystores = repo.manifestlog.sharedhistorystores
134 sharedhistorystores = repo.manifestlog.sharedhistorystores
135 localhistorystores = repo.manifestlog.localhistorystores
135 localhistorystores = repo.manifestlog.localhistorystores
136
136
137 sharedpackpath = shallowutil.getcachepackpath(repo,
137 sharedpackpath = shallowutil.getcachepackpath(repo,
138 constants.TREEPACK_CATEGORY)
138 constants.TREEPACK_CATEGORY)
139 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
139 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
140 constants.TREEPACK_CATEGORY)
140 constants.TREEPACK_CATEGORY)
141
141
142 return ((localpackpath, localdatastores, localhistorystores),
142 return ((localpackpath, localdatastores, localhistorystores),
143 (sharedpackpath, shareddatastores, sharedhistorystores))
143 (sharedpackpath, shareddatastores, sharedhistorystores))
144
144
145 def _topacks(packpath, files, constructor):
145 def _topacks(packpath, files, constructor):
146 paths = list(os.path.join(packpath, p) for p in files)
146 paths = list(os.path.join(packpath, p) for p in files)
147 packs = list(constructor(p) for p in paths)
147 packs = list(constructor(p) for p in paths)
148 return packs
148 return packs
149
149
150 def _deletebigpacks(repo, folder, files):
150 def _deletebigpacks(repo, folder, files):
151 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
151 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
152
152
153 Returns ``files` with the removed files omitted."""
153 Returns ``files` with the removed files omitted."""
154 maxsize = repo.ui.configbytes("packs", "maxpacksize")
154 maxsize = repo.ui.configbytes("packs", "maxpacksize")
155 if maxsize <= 0:
155 if maxsize <= 0:
156 return files
156 return files
157
157
158 # This only considers datapacks today, but we could broaden it to include
158 # This only considers datapacks today, but we could broaden it to include
159 # historypacks.
159 # historypacks.
160 VALIDEXTS = [".datapack", ".dataidx"]
160 VALIDEXTS = [".datapack", ".dataidx"]
161
161
162 # Either an oversize index or datapack will trigger cleanup of the whole
162 # Either an oversize index or datapack will trigger cleanup of the whole
163 # pack:
163 # pack:
164 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
164 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
165 if (stat.st_size > maxsize and (os.path.splitext(path)[1]
165 if (stat.st_size > maxsize and (os.path.splitext(path)[1]
166 in VALIDEXTS))])
166 in VALIDEXTS))])
167
167
168 for rootfname in oversized:
168 for rootfname in oversized:
169 rootpath = os.path.join(folder, rootfname)
169 rootpath = os.path.join(folder, rootfname)
170 for ext in VALIDEXTS:
170 for ext in VALIDEXTS:
171 path = rootpath + ext
171 path = rootpath + ext
172 repo.ui.debug('removing oversize packfile %s (%s)\n' %
172 repo.ui.debug('removing oversize packfile %s (%s)\n' %
173 (path, util.bytecount(os.stat(path).st_size)))
173 (path, util.bytecount(os.stat(path).st_size)))
174 os.unlink(path)
174 os.unlink(path)
175 return [row for row in files if os.path.basename(row[0]) not in oversized]
175 return [row for row in files if os.path.basename(row[0]) not in oversized]
176
176
177 def _incrementalrepack(repo, datastore, historystore, packpath, category,
177 def _incrementalrepack(repo, datastore, historystore, packpath, category,
178 allowincompletedata=False, options=None):
178 allowincompletedata=False, options=None):
179 shallowutil.mkstickygroupdir(repo.ui, packpath)
179 shallowutil.mkstickygroupdir(repo.ui, packpath)
180
180
181 files = osutil.listdir(packpath, stat=True)
181 files = osutil.listdir(packpath, stat=True)
182 files = _deletebigpacks(repo, packpath, files)
182 files = _deletebigpacks(repo, packpath, files)
183 datapacks = _topacks(packpath,
183 datapacks = _topacks(packpath,
184 _computeincrementaldatapack(repo.ui, files),
184 _computeincrementaldatapack(repo.ui, files),
185 datapack.datapack)
185 datapack.datapack)
186 datapacks.extend(s for s in datastore
186 datapacks.extend(s for s in datastore
187 if not isinstance(s, datapack.datapackstore))
187 if not isinstance(s, datapack.datapackstore))
188
188
189 historypacks = _topacks(packpath,
189 historypacks = _topacks(packpath,
190 _computeincrementalhistorypack(repo.ui, files),
190 _computeincrementalhistorypack(repo.ui, files),
191 historypack.historypack)
191 historypack.historypack)
192 historypacks.extend(s for s in historystore
192 historypacks.extend(s for s in historystore
193 if not isinstance(s, historypack.historypackstore))
193 if not isinstance(s, historypack.historypackstore))
194
194
195 # ``allhistory{files,packs}`` contains all known history packs, even ones we
195 # ``allhistory{files,packs}`` contains all known history packs, even ones we
196 # don't plan to repack. They are used during the datapack repack to ensure
196 # don't plan to repack. They are used during the datapack repack to ensure
197 # good ordering of nodes.
197 # good ordering of nodes.
198 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
198 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
199 historypack.INDEXSUFFIX)
199 historypack.INDEXSUFFIX)
200 allhistorypacks = _topacks(packpath,
200 allhistorypacks = _topacks(packpath,
201 (f for f, mode, stat in allhistoryfiles),
201 (f for f, mode, stat in allhistoryfiles),
202 historypack.historypack)
202 historypack.historypack)
203 allhistorypacks.extend(s for s in historystore
203 allhistorypacks.extend(s for s in historystore
204 if not isinstance(s, historypack.historypackstore))
204 if not isinstance(s, historypack.historypackstore))
205 _runrepack(repo,
205 _runrepack(repo,
206 contentstore.unioncontentstore(
206 contentstore.unioncontentstore(
207 *datapacks,
207 *datapacks,
208 allowincomplete=allowincompletedata),
208 allowincomplete=allowincompletedata),
209 metadatastore.unionmetadatastore(
209 metadatastore.unionmetadatastore(
210 *historypacks,
210 *historypacks,
211 allowincomplete=True),
211 allowincomplete=True),
212 packpath, category,
212 packpath, category,
213 fullhistory=metadatastore.unionmetadatastore(
213 fullhistory=metadatastore.unionmetadatastore(
214 *allhistorypacks,
214 *allhistorypacks,
215 allowincomplete=True),
215 allowincomplete=True),
216 options=options)
216 options=options)
217
217
218 def _computeincrementaldatapack(ui, files):
218 def _computeincrementaldatapack(ui, files):
219 opts = {
219 opts = {
220 'gencountlimit' : ui.configint(
220 'gencountlimit' : ui.configint(
221 'remotefilelog', 'data.gencountlimit'),
221 'remotefilelog', 'data.gencountlimit'),
222 'generations' : ui.configlist(
222 'generations' : ui.configlist(
223 'remotefilelog', 'data.generations'),
223 'remotefilelog', 'data.generations'),
224 'maxrepackpacks' : ui.configint(
224 'maxrepackpacks' : ui.configint(
225 'remotefilelog', 'data.maxrepackpacks'),
225 'remotefilelog', 'data.maxrepackpacks'),
226 'repackmaxpacksize' : ui.configbytes(
226 'repackmaxpacksize' : ui.configbytes(
227 'remotefilelog', 'data.repackmaxpacksize'),
227 'remotefilelog', 'data.repackmaxpacksize'),
228 'repacksizelimit' : ui.configbytes(
228 'repacksizelimit' : ui.configbytes(
229 'remotefilelog', 'data.repacksizelimit'),
229 'remotefilelog', 'data.repacksizelimit'),
230 }
230 }
231
231
232 packfiles = _allpackfileswithsuffix(
232 packfiles = _allpackfileswithsuffix(
233 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
233 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
234 return _computeincrementalpack(packfiles, opts)
234 return _computeincrementalpack(packfiles, opts)
235
235
236 def _computeincrementalhistorypack(ui, files):
236 def _computeincrementalhistorypack(ui, files):
237 opts = {
237 opts = {
238 'gencountlimit' : ui.configint(
238 'gencountlimit' : ui.configint(
239 'remotefilelog', 'history.gencountlimit'),
239 'remotefilelog', 'history.gencountlimit'),
240 'generations' : ui.configlist(
240 'generations' : ui.configlist(
241 'remotefilelog', 'history.generations', ['100MB']),
241 'remotefilelog', 'history.generations', ['100MB']),
242 'maxrepackpacks' : ui.configint(
242 'maxrepackpacks' : ui.configint(
243 'remotefilelog', 'history.maxrepackpacks'),
243 'remotefilelog', 'history.maxrepackpacks'),
244 'repackmaxpacksize' : ui.configbytes(
244 'repackmaxpacksize' : ui.configbytes(
245 'remotefilelog', 'history.repackmaxpacksize', '400MB'),
245 'remotefilelog', 'history.repackmaxpacksize', '400MB'),
246 'repacksizelimit' : ui.configbytes(
246 'repacksizelimit' : ui.configbytes(
247 'remotefilelog', 'history.repacksizelimit'),
247 'remotefilelog', 'history.repacksizelimit'),
248 }
248 }
249
249
250 packfiles = _allpackfileswithsuffix(
250 packfiles = _allpackfileswithsuffix(
251 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
251 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
252 return _computeincrementalpack(packfiles, opts)
252 return _computeincrementalpack(packfiles, opts)
253
253
254 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
254 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
255 result = []
255 result = []
256 fileset = set(fn for fn, mode, stat in files)
256 fileset = set(fn for fn, mode, stat in files)
257 for filename, mode, stat in files:
257 for filename, mode, stat in files:
258 if not filename.endswith(packsuffix):
258 if not filename.endswith(packsuffix):
259 continue
259 continue
260
260
261 prefix = filename[:-len(packsuffix)]
261 prefix = filename[:-len(packsuffix)]
262
262
263 # Don't process a pack if it doesn't have an index.
263 # Don't process a pack if it doesn't have an index.
264 if (prefix + indexsuffix) not in fileset:
264 if (prefix + indexsuffix) not in fileset:
265 continue
265 continue
266 result.append((prefix, mode, stat))
266 result.append((prefix, mode, stat))
267
267
268 return result
268 return result
269
269
270 def _computeincrementalpack(files, opts):
270 def _computeincrementalpack(files, opts):
271 """Given a set of pack files along with the configuration options, this
271 """Given a set of pack files along with the configuration options, this
272 function computes the list of files that should be packed as part of an
272 function computes the list of files that should be packed as part of an
273 incremental repack.
273 incremental repack.
274
274
275 It tries to strike a balance between keeping incremental repacks cheap (i.e.
275 It tries to strike a balance between keeping incremental repacks cheap (i.e.
276 packing small things when possible, and rolling the packs up to the big ones
276 packing small things when possible, and rolling the packs up to the big ones
277 over time).
277 over time).
278 """
278 """
279
279
280 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
280 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
281 reverse=True))
281 reverse=True))
282 limits.append(0)
282 limits.append(0)
283
283
284 # Group the packs by generation (i.e. by size)
284 # Group the packs by generation (i.e. by size)
285 generations = []
285 generations = []
286 for i in pycompat.xrange(len(limits)):
286 for i in pycompat.xrange(len(limits)):
287 generations.append([])
287 generations.append([])
288
288
289 sizes = {}
289 sizes = {}
290 for prefix, mode, stat in files:
290 for prefix, mode, stat in files:
291 size = stat.st_size
291 size = stat.st_size
292 if size > opts['repackmaxpacksize']:
292 if size > opts['repackmaxpacksize']:
293 continue
293 continue
294
294
295 sizes[prefix] = size
295 sizes[prefix] = size
296 for i, limit in enumerate(limits):
296 for i, limit in enumerate(limits):
297 if size > limit:
297 if size > limit:
298 generations[i].append(prefix)
298 generations[i].append(prefix)
299 break
299 break
300
300
301 # Steps for picking what packs to repack:
301 # Steps for picking what packs to repack:
302 # 1. Pick the largest generation with > gencountlimit pack files.
302 # 1. Pick the largest generation with > gencountlimit pack files.
303 # 2. Take the smallest three packs.
303 # 2. Take the smallest three packs.
304 # 3. While total-size-of-packs < repacksizelimit: add another pack
304 # 3. While total-size-of-packs < repacksizelimit: add another pack
305
305
306 # Find the largest generation with more than gencountlimit packs
306 # Find the largest generation with more than gencountlimit packs
307 genpacks = []
307 genpacks = []
308 for i, limit in enumerate(limits):
308 for i, limit in enumerate(limits):
309 if len(generations[i]) > opts['gencountlimit']:
309 if len(generations[i]) > opts['gencountlimit']:
310 # Sort to be smallest last, for easy popping later
310 # Sort to be smallest last, for easy popping later
311 genpacks.extend(sorted(generations[i], reverse=True,
311 genpacks.extend(sorted(generations[i], reverse=True,
312 key=lambda x: sizes[x]))
312 key=lambda x: sizes[x]))
313 break
313 break
314
314
315 # Take as many packs from the generation as we can
315 # Take as many packs from the generation as we can
316 chosenpacks = genpacks[-3:]
316 chosenpacks = genpacks[-3:]
317 genpacks = genpacks[:-3]
317 genpacks = genpacks[:-3]
318 repacksize = sum(sizes[n] for n in chosenpacks)
318 repacksize = sum(sizes[n] for n in chosenpacks)
319 while (repacksize < opts['repacksizelimit'] and genpacks and
319 while (repacksize < opts['repacksizelimit'] and genpacks and
320 len(chosenpacks) < opts['maxrepackpacks']):
320 len(chosenpacks) < opts['maxrepackpacks']):
321 chosenpacks.append(genpacks.pop())
321 chosenpacks.append(genpacks.pop())
322 repacksize += sizes[chosenpacks[-1]]
322 repacksize += sizes[chosenpacks[-1]]
323
323
324 return chosenpacks
324 return chosenpacks
325
325
326 def _runrepack(repo, data, history, packpath, category, fullhistory=None,
326 def _runrepack(repo, data, history, packpath, category, fullhistory=None,
327 options=None):
327 options=None):
328 shallowutil.mkstickygroupdir(repo.ui, packpath)
328 shallowutil.mkstickygroupdir(repo.ui, packpath)
329
329
330 def isold(repo, filename, node):
330 def isold(repo, filename, node):
331 """Check if the file node is older than a limit.
331 """Check if the file node is older than a limit.
332 Unless a limit is specified in the config the default limit is taken.
332 Unless a limit is specified in the config the default limit is taken.
333 """
333 """
334 filectx = repo.filectx(filename, fileid=node)
334 filectx = repo.filectx(filename, fileid=node)
335 filetime = repo[filectx.linkrev()].date()
335 filetime = repo[filectx.linkrev()].date()
336
336
337 ttl = repo.ui.configint('remotefilelog', 'nodettl')
337 ttl = repo.ui.configint('remotefilelog', 'nodettl')
338
338
339 limit = time.time() - ttl
339 limit = time.time() - ttl
340 return filetime[0] < limit
340 return filetime[0] < limit
341
341
342 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
342 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
343 if not fullhistory:
343 if not fullhistory:
344 fullhistory = history
344 fullhistory = history
345 packer = repacker(repo, data, history, fullhistory, category,
345 packer = repacker(repo, data, history, fullhistory, category,
346 gc=garbagecollect, isold=isold, options=options)
346 gc=garbagecollect, isold=isold, options=options)
347
347
348 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
348 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
349 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
349 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
350 try:
350 try:
351 packer.run(dpack, hpack)
351 packer.run(dpack, hpack)
352 except error.LockHeld:
352 except error.LockHeld:
353 raise RepackAlreadyRunning(_("skipping repack - another repack "
353 raise RepackAlreadyRunning(_("skipping repack - another repack "
354 "is already running"))
354 "is already running"))
355
355
356 def keepset(repo, keyfn, lastkeepkeys=None):
356 def keepset(repo, keyfn, lastkeepkeys=None):
357 """Computes a keepset which is not garbage collected.
357 """Computes a keepset which is not garbage collected.
358 'keyfn' is a function that maps filename, node to a unique key.
358 'keyfn' is a function that maps filename, node to a unique key.
359 'lastkeepkeys' is an optional argument and if provided the keepset
359 'lastkeepkeys' is an optional argument and if provided the keepset
360 function updates lastkeepkeys with more keys and returns the result.
360 function updates lastkeepkeys with more keys and returns the result.
361 """
361 """
362 if not lastkeepkeys:
362 if not lastkeepkeys:
363 keepkeys = set()
363 keepkeys = set()
364 else:
364 else:
365 keepkeys = lastkeepkeys
365 keepkeys = lastkeepkeys
366
366
367 # We want to keep:
367 # We want to keep:
368 # 1. Working copy parent
368 # 1. Working copy parent
369 # 2. Draft commits
369 # 2. Draft commits
370 # 3. Parents of draft commits
370 # 3. Parents of draft commits
371 # 4. Pullprefetch and bgprefetchrevs revsets if specified
371 # 4. Pullprefetch and bgprefetchrevs revsets if specified
372 revs = ['.', 'draft()', 'parents(draft())']
372 revs = ['.', 'draft()', 'parents(draft())']
373 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
373 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
374 if prefetchrevs:
374 if prefetchrevs:
375 revs.append('(%s)' % prefetchrevs)
375 revs.append('(%s)' % prefetchrevs)
376 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
376 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
377 if prefetchrevs:
377 if prefetchrevs:
378 revs.append('(%s)' % prefetchrevs)
378 revs.append('(%s)' % prefetchrevs)
379 revs = '+'.join(revs)
379 revs = '+'.join(revs)
380
380
381 revs = ['sort((%s), "topo")' % revs]
381 revs = ['sort((%s), "topo")' % revs]
382 keep = scmutil.revrange(repo, revs)
382 keep = scmutil.revrange(repo, revs)
383
383
384 processed = set()
384 processed = set()
385 lastmanifest = None
385 lastmanifest = None
386
386
387 # process the commits in toposorted order starting from the oldest
387 # process the commits in toposorted order starting from the oldest
388 for r in reversed(keep._list):
388 for r in reversed(keep._list):
389 if repo[r].p1().rev() in processed:
389 if repo[r].p1().rev() in processed:
390 # if the direct parent has already been processed
390 # if the direct parent has already been processed
391 # then we only need to process the delta
391 # then we only need to process the delta
392 m = repo[r].manifestctx().readdelta()
392 m = repo[r].manifestctx().readdelta()
393 else:
393 else:
394 # otherwise take the manifest and diff it
394 # otherwise take the manifest and diff it
395 # with the previous manifest if one exists
395 # with the previous manifest if one exists
396 if lastmanifest:
396 if lastmanifest:
397 m = repo[r].manifest().diff(lastmanifest)
397 m = repo[r].manifest().diff(lastmanifest)
398 else:
398 else:
399 m = repo[r].manifest()
399 m = repo[r].manifest()
400 lastmanifest = repo[r].manifest()
400 lastmanifest = repo[r].manifest()
401 processed.add(r)
401 processed.add(r)
402
402
403 # populate keepkeys with keys from the current manifest
403 # populate keepkeys with keys from the current manifest
404 if type(m) is dict:
404 if type(m) is dict:
405 # m is a result of diff of two manifests and is a dictionary that
405 # m is a result of diff of two manifests and is a dictionary that
406 # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
406 # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
407 for filename, diff in m.iteritems():
407 for filename, diff in m.iteritems():
408 if diff[0][0] is not None:
408 if diff[0][0] is not None:
409 keepkeys.add(keyfn(filename, diff[0][0]))
409 keepkeys.add(keyfn(filename, diff[0][0]))
410 else:
410 else:
411 # m is a manifest object
411 # m is a manifest object
412 for filename, filenode in m.iteritems():
412 for filename, filenode in m.iteritems():
413 keepkeys.add(keyfn(filename, filenode))
413 keepkeys.add(keyfn(filename, filenode))
414
414
415 return keepkeys
415 return keepkeys
416
416
417 class repacker(object):
417 class repacker(object):
418 """Class for orchestrating the repack of data and history information into a
418 """Class for orchestrating the repack of data and history information into a
419 new format.
419 new format.
420 """
420 """
421 def __init__(self, repo, data, history, fullhistory, category, gc=False,
421 def __init__(self, repo, data, history, fullhistory, category, gc=False,
422 isold=None, options=None):
422 isold=None, options=None):
423 self.repo = repo
423 self.repo = repo
424 self.data = data
424 self.data = data
425 self.history = history
425 self.history = history
426 self.fullhistory = fullhistory
426 self.fullhistory = fullhistory
427 self.unit = constants.getunits(category)
427 self.unit = constants.getunits(category)
428 self.garbagecollect = gc
428 self.garbagecollect = gc
429 self.options = options
429 self.options = options
430 if self.garbagecollect:
430 if self.garbagecollect:
431 if not isold:
431 if not isold:
432 raise ValueError("Function 'isold' is not properly specified")
432 raise ValueError("Function 'isold' is not properly specified")
433 # use (filename, node) tuple as a keepset key
433 # use (filename, node) tuple as a keepset key
434 self.keepkeys = keepset(repo, lambda f, n : (f, n))
434 self.keepkeys = keepset(repo, lambda f, n : (f, n))
435 self.isold = isold
435 self.isold = isold
436
436
437 def run(self, targetdata, targethistory):
437 def run(self, targetdata, targethistory):
438 ledger = repackledger()
438 ledger = repackledger()
439
439
440 with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
440 with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
441 _('repacking %s') % self.repo.origroot, timeout=0):
441 _('repacking %s') % self.repo.origroot, timeout=0):
442 self.repo.hook('prerepack')
442 self.repo.hook('prerepack')
443
443
444 # Populate ledger from source
444 # Populate ledger from source
445 self.data.markledger(ledger, options=self.options)
445 self.data.markledger(ledger, options=self.options)
446 self.history.markledger(ledger, options=self.options)
446 self.history.markledger(ledger, options=self.options)
447
447
448 # Run repack
448 # Run repack
449 self.repackdata(ledger, targetdata)
449 self.repackdata(ledger, targetdata)
450 self.repackhistory(ledger, targethistory)
450 self.repackhistory(ledger, targethistory)
451
451
452 # Call cleanup on each source
452 # Call cleanup on each source
453 for source in ledger.sources:
453 for source in ledger.sources:
454 source.cleanup(ledger)
454 source.cleanup(ledger)
455
455
456 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
456 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
457 """Reorderes ``orphans`` into a single chain inside ``nodes`` and
457 """Reorderes ``orphans`` into a single chain inside ``nodes`` and
458 ``deltabases``.
458 ``deltabases``.
459
459
460 We often have orphan entries (nodes without a base that aren't
460 We often have orphan entries (nodes without a base that aren't
461 referenced by other nodes -- i.e., part of a chain) due to gaps in
461 referenced by other nodes -- i.e., part of a chain) due to gaps in
462 history. Rather than store them as individual fulltexts, we prefer to
462 history. Rather than store them as individual fulltexts, we prefer to
463 insert them as one chain sorted by size.
463 insert them as one chain sorted by size.
464 """
464 """
465 if not orphans:
465 if not orphans:
466 return nodes
466 return nodes
467
467
468 def getsize(node, default=0):
468 def getsize(node, default=0):
469 meta = self.data.getmeta(filename, node)
469 meta = self.data.getmeta(filename, node)
470 if constants.METAKEYSIZE in meta:
470 if constants.METAKEYSIZE in meta:
471 return meta[constants.METAKEYSIZE]
471 return meta[constants.METAKEYSIZE]
472 else:
472 else:
473 return default
473 return default
474
474
475 # Sort orphans by size; biggest first is preferred, since it's more
475 # Sort orphans by size; biggest first is preferred, since it's more
476 # likely to be the newest version assuming files grow over time.
476 # likely to be the newest version assuming files grow over time.
477 # (Sort by node first to ensure the sort is stable.)
477 # (Sort by node first to ensure the sort is stable.)
478 orphans = sorted(orphans)
478 orphans = sorted(orphans)
479 orphans = list(sorted(orphans, key=getsize, reverse=True))
479 orphans = list(sorted(orphans, key=getsize, reverse=True))
480 if ui.debugflag:
480 if ui.debugflag:
481 ui.debug("%s: orphan chain: %s\n" % (filename,
481 ui.debug("%s: orphan chain: %s\n" % (filename,
482 ", ".join([short(s) for s in orphans])))
482 ", ".join([short(s) for s in orphans])))
483
483
484 # Create one contiguous chain and reassign deltabases.
484 # Create one contiguous chain and reassign deltabases.
485 for i, node in enumerate(orphans):
485 for i, node in enumerate(orphans):
486 if i == 0:
486 if i == 0:
487 deltabases[node] = (nullid, 0)
487 deltabases[node] = (nullid, 0)
488 else:
488 else:
489 parent = orphans[i - 1]
489 parent = orphans[i - 1]
490 deltabases[node] = (parent, deltabases[parent][1] + 1)
490 deltabases[node] = (parent, deltabases[parent][1] + 1)
491 nodes = filter(lambda node: node not in orphans, nodes)
491 nodes = filter(lambda node: node not in orphans, nodes)
492 nodes += orphans
492 nodes += orphans
493 return nodes
493 return nodes
494
494
495 def repackdata(self, ledger, target):
495 def repackdata(self, ledger, target):
496 ui = self.repo.ui
496 ui = self.repo.ui
497 maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
497 maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
498
498
499 byfile = {}
499 byfile = {}
500 for entry in ledger.entries.itervalues():
500 for entry in ledger.entries.itervalues():
501 if entry.datasource:
501 if entry.datasource:
502 byfile.setdefault(entry.filename, {})[entry.node] = entry
502 byfile.setdefault(entry.filename, {})[entry.node] = entry
503
503
504 count = 0
504 count = 0
505 for filename, entries in sorted(byfile.iteritems()):
505 for filename, entries in sorted(byfile.iteritems()):
506 ui.progress(_("repacking data"), count, unit=self.unit,
506 ui.progress(_("repacking data"), count, unit=self.unit,
507 total=len(byfile))
507 total=len(byfile))
508
508
509 ancestors = {}
509 ancestors = {}
510 nodes = list(node for node in entries.iterkeys())
510 nodes = list(node for node in entries.iterkeys())
511 nohistory = []
511 nohistory = []
512 for i, node in enumerate(nodes):
512 for i, node in enumerate(nodes):
513 if node in ancestors:
513 if node in ancestors:
514 continue
514 continue
515 ui.progress(_("building history"), i, unit='nodes',
515 ui.progress(_("building history"), i, unit='nodes',
516 total=len(nodes))
516 total=len(nodes))
517 try:
517 try:
518 ancestors.update(self.fullhistory.getancestors(filename,
518 ancestors.update(self.fullhistory.getancestors(filename,
519 node, known=ancestors))
519 node, known=ancestors))
520 except KeyError:
520 except KeyError:
521 # Since we're packing data entries, we may not have the
521 # Since we're packing data entries, we may not have the
522 # corresponding history entries for them. It's not a big
522 # corresponding history entries for them. It's not a big
523 # deal, but the entries won't be delta'd perfectly.
523 # deal, but the entries won't be delta'd perfectly.
524 nohistory.append(node)
524 nohistory.append(node)
525 ui.progress(_("building history"), None)
525 ui.progress(_("building history"), None)
526
526
527 # Order the nodes children first, so we can produce reverse deltas
527 # Order the nodes children first, so we can produce reverse deltas
528 orderednodes = list(reversed(self._toposort(ancestors)))
528 orderednodes = list(reversed(self._toposort(ancestors)))
529 if len(nohistory) > 0:
529 if len(nohistory) > 0:
530 ui.debug('repackdata: %d nodes without history\n' %
530 ui.debug('repackdata: %d nodes without history\n' %
531 len(nohistory))
531 len(nohistory))
532 orderednodes.extend(sorted(nohistory))
532 orderednodes.extend(sorted(nohistory))
533
533
534 # Filter orderednodes to just the nodes we want to serialize (it
534 # Filter orderednodes to just the nodes we want to serialize (it
535 # currently also has the edge nodes' ancestors).
535 # currently also has the edge nodes' ancestors).
536 orderednodes = filter(lambda node: node in nodes, orderednodes)
536 orderednodes = filter(lambda node: node in nodes, orderednodes)
537
537
538 # Garbage collect old nodes:
538 # Garbage collect old nodes:
539 if self.garbagecollect:
539 if self.garbagecollect:
540 neworderednodes = []
540 neworderednodes = []
541 for node in orderednodes:
541 for node in orderednodes:
542 # If the node is old and is not in the keepset, we skip it,
542 # If the node is old and is not in the keepset, we skip it,
543 # and mark as garbage collected
543 # and mark as garbage collected
544 if ((filename, node) not in self.keepkeys and
544 if ((filename, node) not in self.keepkeys and
545 self.isold(self.repo, filename, node)):
545 self.isold(self.repo, filename, node)):
546 entries[node].gced = True
546 entries[node].gced = True
547 continue
547 continue
548 neworderednodes.append(node)
548 neworderednodes.append(node)
549 orderednodes = neworderednodes
549 orderednodes = neworderednodes
550
550
551 # Compute delta bases for nodes:
551 # Compute delta bases for nodes:
552 deltabases = {}
552 deltabases = {}
553 nobase = set()
553 nobase = set()
554 referenced = set()
554 referenced = set()
555 nodes = set(nodes)
555 nodes = set(nodes)
556 for i, node in enumerate(orderednodes):
556 for i, node in enumerate(orderednodes):
557 ui.progress(_("processing nodes"), i, unit='nodes',
557 ui.progress(_("processing nodes"), i, unit='nodes',
558 total=len(orderednodes))
558 total=len(orderednodes))
559 # Find delta base
559 # Find delta base
560 # TODO: allow delta'ing against most recent descendant instead
560 # TODO: allow delta'ing against most recent descendant instead
561 # of immediate child
561 # of immediate child
562 deltatuple = deltabases.get(node, None)
562 deltatuple = deltabases.get(node, None)
563 if deltatuple is None:
563 if deltatuple is None:
564 deltabase, chainlen = nullid, 0
564 deltabase, chainlen = nullid, 0
565 deltabases[node] = (nullid, 0)
565 deltabases[node] = (nullid, 0)
566 nobase.add(node)
566 nobase.add(node)
567 else:
567 else:
568 deltabase, chainlen = deltatuple
568 deltabase, chainlen = deltatuple
569 referenced.add(deltabase)
569 referenced.add(deltabase)
570
570
571 # Use available ancestor information to inform our delta choices
571 # Use available ancestor information to inform our delta choices
572 ancestorinfo = ancestors.get(node)
572 ancestorinfo = ancestors.get(node)
573 if ancestorinfo:
573 if ancestorinfo:
574 p1, p2, linknode, copyfrom = ancestorinfo
574 p1, p2, linknode, copyfrom = ancestorinfo
575
575
576 # The presence of copyfrom means we're at a point where the
576 # The presence of copyfrom means we're at a point where the
577 # file was copied from elsewhere. So don't attempt to do any
577 # file was copied from elsewhere. So don't attempt to do any
578 # deltas with the other file.
578 # deltas with the other file.
579 if copyfrom:
579 if copyfrom:
580 p1 = nullid
580 p1 = nullid
581
581
582 if chainlen < maxchainlen:
582 if chainlen < maxchainlen:
583 # Record this child as the delta base for its parents.
583 # Record this child as the delta base for its parents.
584 # This may be non optimal, since the parents may have
584 # This may be non optimal, since the parents may have
585 # many children, and this will only choose the last one.
585 # many children, and this will only choose the last one.
586 # TODO: record all children and try all deltas to find
586 # TODO: record all children and try all deltas to find
587 # best
587 # best
588 if p1 != nullid:
588 if p1 != nullid:
589 deltabases[p1] = (node, chainlen + 1)
589 deltabases[p1] = (node, chainlen + 1)
590 if p2 != nullid:
590 if p2 != nullid:
591 deltabases[p2] = (node, chainlen + 1)
591 deltabases[p2] = (node, chainlen + 1)
592
592
593 # experimental config: repack.chainorphansbysize
593 # experimental config: repack.chainorphansbysize
594 if ui.configbool('repack', 'chainorphansbysize'):
594 if ui.configbool('repack', 'chainorphansbysize'):
595 orphans = nobase - referenced
595 orphans = nobase - referenced
596 orderednodes = self._chainorphans(ui, filename, orderednodes,
596 orderednodes = self._chainorphans(ui, filename, orderednodes,
597 orphans, deltabases)
597 orphans, deltabases)
598
598
599 # Compute deltas and write to the pack
599 # Compute deltas and write to the pack
600 for i, node in enumerate(orderednodes):
600 for i, node in enumerate(orderednodes):
601 deltabase, chainlen = deltabases[node]
601 deltabase, chainlen = deltabases[node]
602 # Compute delta
602 # Compute delta
603 # TODO: Optimize the deltachain fetching. Since we're
603 # TODO: Optimize the deltachain fetching. Since we're
604 # iterating over the different version of the file, we may
604 # iterating over the different version of the file, we may
605 # be fetching the same deltachain over and over again.
605 # be fetching the same deltachain over and over again.
606 meta = None
606 meta = None
607 if deltabase != nullid:
607 if deltabase != nullid:
608 deltaentry = self.data.getdelta(filename, node)
608 deltaentry = self.data.getdelta(filename, node)
609 delta, deltabasename, origdeltabase, meta = deltaentry
609 delta, deltabasename, origdeltabase, meta = deltaentry
610 size = meta.get(constants.METAKEYSIZE)
610 size = meta.get(constants.METAKEYSIZE)
611 if (deltabasename != filename or origdeltabase != deltabase
611 if (deltabasename != filename or origdeltabase != deltabase
612 or size is None):
612 or size is None):
613 deltabasetext = self.data.get(filename, deltabase)
613 deltabasetext = self.data.get(filename, deltabase)
614 original = self.data.get(filename, node)
614 original = self.data.get(filename, node)
615 size = len(original)
615 size = len(original)
616 delta = mdiff.textdiff(deltabasetext, original)
616 delta = mdiff.textdiff(deltabasetext, original)
617 else:
617 else:
618 delta = self.data.get(filename, node)
618 delta = self.data.get(filename, node)
619 size = len(delta)
619 size = len(delta)
620 meta = self.data.getmeta(filename, node)
620 meta = self.data.getmeta(filename, node)
621
621
622 # TODO: don't use the delta if it's larger than the fulltext
622 # TODO: don't use the delta if it's larger than the fulltext
623 if constants.METAKEYSIZE not in meta:
623 if constants.METAKEYSIZE not in meta:
624 meta[constants.METAKEYSIZE] = size
624 meta[constants.METAKEYSIZE] = size
625 target.add(filename, node, deltabase, delta, meta)
625 target.add(filename, node, deltabase, delta, meta)
626
626
627 entries[node].datarepacked = True
627 entries[node].datarepacked = True
628
628
629 ui.progress(_("processing nodes"), None)
629 ui.progress(_("processing nodes"), None)
630 count += 1
630 count += 1
631
631
632 ui.progress(_("repacking data"), None)
632 ui.progress(_("repacking data"), None)
633 target.close(ledger=ledger)
633 target.close(ledger=ledger)
634
634
635 def repackhistory(self, ledger, target):
635 def repackhistory(self, ledger, target):
636 ui = self.repo.ui
636 ui = self.repo.ui
637
637
638 byfile = {}
638 byfile = {}
639 for entry in ledger.entries.itervalues():
639 for entry in ledger.entries.itervalues():
640 if entry.historysource:
640 if entry.historysource:
641 byfile.setdefault(entry.filename, {})[entry.node] = entry
641 byfile.setdefault(entry.filename, {})[entry.node] = entry
642
642
643 count = 0
643 count = 0
644 for filename, entries in sorted(byfile.iteritems()):
644 for filename, entries in sorted(byfile.iteritems()):
645 ancestors = {}
645 ancestors = {}
646 nodes = list(node for node in entries.iterkeys())
646 nodes = list(node for node in entries.iterkeys())
647
647
648 for node in nodes:
648 for node in nodes:
649 if node in ancestors:
649 if node in ancestors:
650 continue
650 continue
651 ancestors.update(self.history.getancestors(filename, node,
651 ancestors.update(self.history.getancestors(filename, node,
652 known=ancestors))
652 known=ancestors))
653
653
654 # Order the nodes children first
654 # Order the nodes children first
655 orderednodes = reversed(self._toposort(ancestors))
655 orderednodes = reversed(self._toposort(ancestors))
656
656
657 # Write to the pack
657 # Write to the pack
658 dontprocess = set()
658 dontprocess = set()
659 for node in orderednodes:
659 for node in orderednodes:
660 p1, p2, linknode, copyfrom = ancestors[node]
660 p1, p2, linknode, copyfrom = ancestors[node]
661
661
662 # If the node is marked dontprocess, but it's also in the
662 # If the node is marked dontprocess, but it's also in the
663 # explicit entries set, that means the node exists both in this
663 # explicit entries set, that means the node exists both in this
664 # file and in another file that was copied to this file.
664 # file and in another file that was copied to this file.
665 # Usually this happens if the file was copied to another file,
665 # Usually this happens if the file was copied to another file,
666 # then the copy was deleted, then reintroduced without copy
666 # then the copy was deleted, then reintroduced without copy
667 # metadata. The original add and the new add have the same hash
667 # metadata. The original add and the new add have the same hash
668 # since the content is identical and the parents are null.
668 # since the content is identical and the parents are null.
669 if node in dontprocess and node not in entries:
669 if node in dontprocess and node not in entries:
670 # If copyfrom == filename, it means the copy history
670 # If copyfrom == filename, it means the copy history
671 # went to come other file, then came back to this one, so we
671 # went to come other file, then came back to this one, so we
672 # should continue processing it.
672 # should continue processing it.
673 if p1 != nullid and copyfrom != filename:
673 if p1 != nullid and copyfrom != filename:
674 dontprocess.add(p1)
674 dontprocess.add(p1)
675 if p2 != nullid:
675 if p2 != nullid:
676 dontprocess.add(p2)
676 dontprocess.add(p2)
677 continue
677 continue
678
678
679 if copyfrom:
679 if copyfrom:
680 dontprocess.add(p1)
680 dontprocess.add(p1)
681
681
682 target.add(filename, node, p1, p2, linknode, copyfrom)
682 target.add(filename, node, p1, p2, linknode, copyfrom)
683
683
684 if node in entries:
684 if node in entries:
685 entries[node].historyrepacked = True
685 entries[node].historyrepacked = True
686
686
687 count += 1
687 count += 1
688 ui.progress(_("repacking history"), count, unit=self.unit,
688 ui.progress(_("repacking history"), count, unit=self.unit,
689 total=len(byfile))
689 total=len(byfile))
690
690
691 ui.progress(_("repacking history"), None)
691 ui.progress(_("repacking history"), None)
692 target.close(ledger=ledger)
692 target.close(ledger=ledger)
693
693
694 def _toposort(self, ancestors):
694 def _toposort(self, ancestors):
695 def parentfunc(node):
695 def parentfunc(node):
696 p1, p2, linknode, copyfrom = ancestors[node]
696 p1, p2, linknode, copyfrom = ancestors[node]
697 parents = []
697 parents = []
698 if p1 != nullid:
698 if p1 != nullid:
699 parents.append(p1)
699 parents.append(p1)
700 if p2 != nullid:
700 if p2 != nullid:
701 parents.append(p2)
701 parents.append(p2)
702 return parents
702 return parents
703
703
704 sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
704 sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
705 return sortednodes
705 return sortednodes
706
706
707 class repackledger(object):
707 class repackledger(object):
708 """Storage for all the bookkeeping that happens during a repack. It contains
708 """Storage for all the bookkeeping that happens during a repack. It contains
709 the list of revisions being repacked, what happened to each revision, and
709 the list of revisions being repacked, what happened to each revision, and
710 which source store contained which revision originally (for later cleanup).
710 which source store contained which revision originally (for later cleanup).
711 """
711 """
712 def __init__(self):
712 def __init__(self):
713 self.entries = {}
713 self.entries = {}
714 self.sources = {}
714 self.sources = {}
715 self.created = set()
715 self.created = set()
716
716
717 def markdataentry(self, source, filename, node):
717 def markdataentry(self, source, filename, node):
718 """Mark the given filename+node revision as having a data rev in the
718 """Mark the given filename+node revision as having a data rev in the
719 given source.
719 given source.
720 """
720 """
721 entry = self._getorcreateentry(filename, node)
721 entry = self._getorcreateentry(filename, node)
722 entry.datasource = True
722 entry.datasource = True
723 entries = self.sources.get(source)
723 entries = self.sources.get(source)
724 if not entries:
724 if not entries:
725 entries = set()
725 entries = set()
726 self.sources[source] = entries
726 self.sources[source] = entries
727 entries.add(entry)
727 entries.add(entry)
728
728
729 def markhistoryentry(self, source, filename, node):
729 def markhistoryentry(self, source, filename, node):
730 """Mark the given filename+node revision as having a history rev in the
730 """Mark the given filename+node revision as having a history rev in the
731 given source.
731 given source.
732 """
732 """
733 entry = self._getorcreateentry(filename, node)
733 entry = self._getorcreateentry(filename, node)
734 entry.historysource = True
734 entry.historysource = True
735 entries = self.sources.get(source)
735 entries = self.sources.get(source)
736 if not entries:
736 if not entries:
737 entries = set()
737 entries = set()
738 self.sources[source] = entries
738 self.sources[source] = entries
739 entries.add(entry)
739 entries.add(entry)
740
740
741 def _getorcreateentry(self, filename, node):
741 def _getorcreateentry(self, filename, node):
742 key = (filename, node)
742 key = (filename, node)
743 value = self.entries.get(key)
743 value = self.entries.get(key)
744 if not value:
744 if not value:
745 value = repackentry(filename, node)
745 value = repackentry(filename, node)
746 self.entries[key] = value
746 self.entries[key] = value
747
747
748 return value
748 return value
749
749
750 def addcreated(self, value):
750 def addcreated(self, value):
751 self.created.add(value)
751 self.created.add(value)
752
752
753 class repackentry(object):
753 class repackentry(object):
754 """Simple class representing a single revision entry in the repackledger.
754 """Simple class representing a single revision entry in the repackledger.
755 """
755 """
756 __slots__ = ['filename', 'node', 'datasource', 'historysource',
756 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
757 'datarepacked', 'historyrepacked', 'gced']
757 r'datarepacked', r'historyrepacked', r'gced')
758 def __init__(self, filename, node):
758 def __init__(self, filename, node):
759 self.filename = filename
759 self.filename = filename
760 self.node = node
760 self.node = node
761 # If the revision has a data entry in the source
761 # If the revision has a data entry in the source
762 self.datasource = False
762 self.datasource = False
763 # If the revision has a history entry in the source
763 # If the revision has a history entry in the source
764 self.historysource = False
764 self.historysource = False
765 # If the revision's data entry was repacked into the repack target
765 # If the revision's data entry was repacked into the repack target
766 self.datarepacked = False
766 self.datarepacked = False
767 # If the revision's history entry was repacked into the repack target
767 # If the revision's history entry was repacked into the repack target
768 self.historyrepacked = False
768 self.historyrepacked = False
769 # If garbage collected
769 # If garbage collected
770 self.gced = False
770 self.gced = False
771
771
772 def repacklockvfs(repo):
772 def repacklockvfs(repo):
773 if util.safehasattr(repo, 'name'):
773 if util.safehasattr(repo, 'name'):
774 # Lock in the shared cache so repacks across multiple copies of the same
774 # Lock in the shared cache so repacks across multiple copies of the same
775 # repo are coordinated.
775 # repo are coordinated.
776 sharedcachepath = shallowutil.getcachepackpath(
776 sharedcachepath = shallowutil.getcachepackpath(
777 repo,
777 repo,
778 constants.FILEPACK_CATEGORY)
778 constants.FILEPACK_CATEGORY)
779 return vfs.vfs(sharedcachepath)
779 return vfs.vfs(sharedcachepath)
780 else:
780 else:
781 return repo.svfs
781 return repo.svfs
@@ -1,491 +1,491 b''
1 # shallowutil.py -- remotefilelog utilities
1 # shallowutil.py -- remotefilelog utilities
2 #
2 #
3 # Copyright 2014 Facebook, Inc.
3 # Copyright 2014 Facebook, Inc.
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7 from __future__ import absolute_import
7 from __future__ import absolute_import
8
8
9 import collections
9 import collections
10 import errno
10 import errno
11 import hashlib
11 import hashlib
12 import os
12 import os
13 import stat
13 import stat
14 import struct
14 import struct
15 import tempfile
15 import tempfile
16
16
17 from mercurial.i18n import _
17 from mercurial.i18n import _
18 from mercurial import (
18 from mercurial import (
19 error,
19 error,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 util,
22 util,
23 )
23 )
24 from mercurial.utils import (
24 from mercurial.utils import (
25 storageutil,
25 storageutil,
26 stringutil,
26 stringutil,
27 )
27 )
28 from . import constants
28 from . import constants
29
29
30 if not pycompat.iswindows:
30 if not pycompat.iswindows:
31 import grp
31 import grp
32
32
33 def isenabled(repo):
33 def isenabled(repo):
34 """returns whether the repository is remotefilelog enabled or not"""
34 """returns whether the repository is remotefilelog enabled or not"""
35 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
35 return constants.SHALLOWREPO_REQUIREMENT in repo.requirements
36
36
37 def getcachekey(reponame, file, id):
37 def getcachekey(reponame, file, id):
38 pathhash = hashlib.sha1(file).hexdigest()
38 pathhash = hashlib.sha1(file).hexdigest()
39 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
39 return os.path.join(reponame, pathhash[:2], pathhash[2:], id)
40
40
41 def getlocalkey(file, id):
41 def getlocalkey(file, id):
42 pathhash = hashlib.sha1(file).hexdigest()
42 pathhash = hashlib.sha1(file).hexdigest()
43 return os.path.join(pathhash, id)
43 return os.path.join(pathhash, id)
44
44
45 def getcachepath(ui, allowempty=False):
45 def getcachepath(ui, allowempty=False):
46 cachepath = ui.config("remotefilelog", "cachepath")
46 cachepath = ui.config("remotefilelog", "cachepath")
47 if not cachepath:
47 if not cachepath:
48 if allowempty:
48 if allowempty:
49 return None
49 return None
50 else:
50 else:
51 raise error.Abort(_("could not find config option "
51 raise error.Abort(_("could not find config option "
52 "remotefilelog.cachepath"))
52 "remotefilelog.cachepath"))
53 return util.expandpath(cachepath)
53 return util.expandpath(cachepath)
54
54
55 def getcachepackpath(repo, category):
55 def getcachepackpath(repo, category):
56 cachepath = getcachepath(repo.ui)
56 cachepath = getcachepath(repo.ui)
57 if category != constants.FILEPACK_CATEGORY:
57 if category != constants.FILEPACK_CATEGORY:
58 return os.path.join(cachepath, repo.name, 'packs', category)
58 return os.path.join(cachepath, repo.name, 'packs', category)
59 else:
59 else:
60 return os.path.join(cachepath, repo.name, 'packs')
60 return os.path.join(cachepath, repo.name, 'packs')
61
61
62 def getlocalpackpath(base, category):
62 def getlocalpackpath(base, category):
63 return os.path.join(base, 'packs', category)
63 return os.path.join(base, 'packs', category)
64
64
65 def createrevlogtext(text, copyfrom=None, copyrev=None):
65 def createrevlogtext(text, copyfrom=None, copyrev=None):
66 """returns a string that matches the revlog contents in a
66 """returns a string that matches the revlog contents in a
67 traditional revlog
67 traditional revlog
68 """
68 """
69 meta = {}
69 meta = {}
70 if copyfrom or text.startswith('\1\n'):
70 if copyfrom or text.startswith('\1\n'):
71 if copyfrom:
71 if copyfrom:
72 meta['copy'] = copyfrom
72 meta['copy'] = copyfrom
73 meta['copyrev'] = copyrev
73 meta['copyrev'] = copyrev
74 text = storageutil.packmeta(meta, text)
74 text = storageutil.packmeta(meta, text)
75
75
76 return text
76 return text
77
77
78 def parsemeta(text):
78 def parsemeta(text):
79 """parse mercurial filelog metadata"""
79 """parse mercurial filelog metadata"""
80 meta, size = storageutil.parsemeta(text)
80 meta, size = storageutil.parsemeta(text)
81 if text.startswith('\1\n'):
81 if text.startswith('\1\n'):
82 s = text.index('\1\n', 2)
82 s = text.index('\1\n', 2)
83 text = text[s + 2:]
83 text = text[s + 2:]
84 return meta or {}, text
84 return meta or {}, text
85
85
86 def sumdicts(*dicts):
86 def sumdicts(*dicts):
87 """Adds all the values of *dicts together into one dictionary. This assumes
87 """Adds all the values of *dicts together into one dictionary. This assumes
88 the values in *dicts are all summable.
88 the values in *dicts are all summable.
89
89
90 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
90 e.g. [{'a': 4', 'b': 2}, {'b': 3, 'c': 1}] -> {'a': 4, 'b': 5, 'c': 1}
91 """
91 """
92 result = collections.defaultdict(lambda: 0)
92 result = collections.defaultdict(lambda: 0)
93 for dict in dicts:
93 for dict in dicts:
94 for k, v in dict.iteritems():
94 for k, v in dict.iteritems():
95 result[k] += v
95 result[k] += v
96 return result
96 return result
97
97
98 def prefixkeys(dict, prefix):
98 def prefixkeys(dict, prefix):
99 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
99 """Returns ``dict`` with ``prefix`` prepended to all its keys."""
100 result = {}
100 result = {}
101 for k, v in dict.iteritems():
101 for k, v in dict.iteritems():
102 result[prefix + k] = v
102 result[prefix + k] = v
103 return result
103 return result
104
104
105 def reportpackmetrics(ui, prefix, *stores):
105 def reportpackmetrics(ui, prefix, *stores):
106 dicts = [s.getmetrics() for s in stores]
106 dicts = [s.getmetrics() for s in stores]
107 dict = prefixkeys(sumdicts(*dicts), prefix + '_')
107 dict = prefixkeys(sumdicts(*dicts), prefix + '_')
108 ui.log(prefix + "_packsizes", "", **dict)
108 ui.log(prefix + "_packsizes", "", **dict)
109
109
110 def _parsepackmeta(metabuf):
110 def _parsepackmeta(metabuf):
111 """parse datapack meta, bytes (<metadata-list>) -> dict
111 """parse datapack meta, bytes (<metadata-list>) -> dict
112
112
113 The dict contains raw content - both keys and values are strings.
113 The dict contains raw content - both keys and values are strings.
114 Upper-level business may want to convert some of them to other types like
114 Upper-level business may want to convert some of them to other types like
115 integers, on their own.
115 integers, on their own.
116
116
117 raise ValueError if the data is corrupted
117 raise ValueError if the data is corrupted
118 """
118 """
119 metadict = {}
119 metadict = {}
120 offset = 0
120 offset = 0
121 buflen = len(metabuf)
121 buflen = len(metabuf)
122 while buflen - offset >= 3:
122 while buflen - offset >= 3:
123 key = metabuf[offset]
123 key = metabuf[offset]
124 offset += 1
124 offset += 1
125 metalen = struct.unpack_from('!H', metabuf, offset)[0]
125 metalen = struct.unpack_from('!H', metabuf, offset)[0]
126 offset += 2
126 offset += 2
127 if offset + metalen > buflen:
127 if offset + metalen > buflen:
128 raise ValueError('corrupted metadata: incomplete buffer')
128 raise ValueError('corrupted metadata: incomplete buffer')
129 value = metabuf[offset:offset + metalen]
129 value = metabuf[offset:offset + metalen]
130 metadict[key] = value
130 metadict[key] = value
131 offset += metalen
131 offset += metalen
132 if offset != buflen:
132 if offset != buflen:
133 raise ValueError('corrupted metadata: redundant data')
133 raise ValueError('corrupted metadata: redundant data')
134 return metadict
134 return metadict
135
135
136 def _buildpackmeta(metadict):
136 def _buildpackmeta(metadict):
137 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
137 """reverse of _parsepackmeta, dict -> bytes (<metadata-list>)
138
138
139 The dict contains raw content - both keys and values are strings.
139 The dict contains raw content - both keys and values are strings.
140 Upper-level business may want to serialize some of other types (like
140 Upper-level business may want to serialize some of other types (like
141 integers) to strings before calling this function.
141 integers) to strings before calling this function.
142
142
143 raise ProgrammingError when metadata key is illegal, or ValueError if
143 raise ProgrammingError when metadata key is illegal, or ValueError if
144 length limit is exceeded
144 length limit is exceeded
145 """
145 """
146 metabuf = ''
146 metabuf = ''
147 for k, v in sorted((metadict or {}).iteritems()):
147 for k, v in sorted((metadict or {}).iteritems()):
148 if len(k) != 1:
148 if len(k) != 1:
149 raise error.ProgrammingError('packmeta: illegal key: %s' % k)
149 raise error.ProgrammingError('packmeta: illegal key: %s' % k)
150 if len(v) > 0xfffe:
150 if len(v) > 0xfffe:
151 raise ValueError('metadata value is too long: 0x%x > 0xfffe'
151 raise ValueError('metadata value is too long: 0x%x > 0xfffe'
152 % len(v))
152 % len(v))
153 metabuf += k
153 metabuf += k
154 metabuf += struct.pack('!H', len(v))
154 metabuf += struct.pack('!H', len(v))
155 metabuf += v
155 metabuf += v
156 # len(metabuf) is guaranteed representable in 4 bytes, because there are
156 # len(metabuf) is guaranteed representable in 4 bytes, because there are
157 # only 256 keys, and for each value, len(value) <= 0xfffe.
157 # only 256 keys, and for each value, len(value) <= 0xfffe.
158 return metabuf
158 return metabuf
159
159
160 _metaitemtypes = {
160 _metaitemtypes = {
161 constants.METAKEYFLAG: (int, long),
161 constants.METAKEYFLAG: (int, pycompat.long),
162 constants.METAKEYSIZE: (int, long),
162 constants.METAKEYSIZE: (int, pycompat.long),
163 }
163 }
164
164
165 def buildpackmeta(metadict):
165 def buildpackmeta(metadict):
166 """like _buildpackmeta, but typechecks metadict and normalize it.
166 """like _buildpackmeta, but typechecks metadict and normalize it.
167
167
168 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
168 This means, METAKEYSIZE and METAKEYSIZE should have integers as values,
169 and METAKEYFLAG will be dropped if its value is 0.
169 and METAKEYFLAG will be dropped if its value is 0.
170 """
170 """
171 newmeta = {}
171 newmeta = {}
172 for k, v in (metadict or {}).iteritems():
172 for k, v in (metadict or {}).iteritems():
173 expectedtype = _metaitemtypes.get(k, (bytes,))
173 expectedtype = _metaitemtypes.get(k, (bytes,))
174 if not isinstance(v, expectedtype):
174 if not isinstance(v, expectedtype):
175 raise error.ProgrammingError('packmeta: wrong type of key %s' % k)
175 raise error.ProgrammingError('packmeta: wrong type of key %s' % k)
176 # normalize int to binary buffer
176 # normalize int to binary buffer
177 if int in expectedtype:
177 if int in expectedtype:
178 # optimization: remove flag if it's 0 to save space
178 # optimization: remove flag if it's 0 to save space
179 if k == constants.METAKEYFLAG and v == 0:
179 if k == constants.METAKEYFLAG and v == 0:
180 continue
180 continue
181 v = int2bin(v)
181 v = int2bin(v)
182 newmeta[k] = v
182 newmeta[k] = v
183 return _buildpackmeta(newmeta)
183 return _buildpackmeta(newmeta)
184
184
185 def parsepackmeta(metabuf):
185 def parsepackmeta(metabuf):
186 """like _parsepackmeta, but convert fields to desired types automatically.
186 """like _parsepackmeta, but convert fields to desired types automatically.
187
187
188 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
188 This means, METAKEYFLAG and METAKEYSIZE fields will be converted to
189 integers.
189 integers.
190 """
190 """
191 metadict = _parsepackmeta(metabuf)
191 metadict = _parsepackmeta(metabuf)
192 for k, v in metadict.iteritems():
192 for k, v in metadict.iteritems():
193 if k in _metaitemtypes and int in _metaitemtypes[k]:
193 if k in _metaitemtypes and int in _metaitemtypes[k]:
194 metadict[k] = bin2int(v)
194 metadict[k] = bin2int(v)
195 return metadict
195 return metadict
196
196
197 def int2bin(n):
197 def int2bin(n):
198 """convert a non-negative integer to raw binary buffer"""
198 """convert a non-negative integer to raw binary buffer"""
199 buf = bytearray()
199 buf = bytearray()
200 while n > 0:
200 while n > 0:
201 buf.insert(0, n & 0xff)
201 buf.insert(0, n & 0xff)
202 n >>= 8
202 n >>= 8
203 return bytes(buf)
203 return bytes(buf)
204
204
205 def bin2int(buf):
205 def bin2int(buf):
206 """the reverse of int2bin, convert a binary buffer to an integer"""
206 """the reverse of int2bin, convert a binary buffer to an integer"""
207 x = 0
207 x = 0
208 for b in bytearray(buf):
208 for b in bytearray(buf):
209 x <<= 8
209 x <<= 8
210 x |= b
210 x |= b
211 return x
211 return x
212
212
213 def parsesizeflags(raw):
213 def parsesizeflags(raw):
214 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
214 """given a remotefilelog blob, return (headersize, rawtextsize, flags)
215
215
216 see remotefilelogserver.createfileblob for the format.
216 see remotefilelogserver.createfileblob for the format.
217 raise RuntimeError if the content is illformed.
217 raise RuntimeError if the content is illformed.
218 """
218 """
219 flags = revlog.REVIDX_DEFAULT_FLAGS
219 flags = revlog.REVIDX_DEFAULT_FLAGS
220 size = None
220 size = None
221 try:
221 try:
222 index = raw.index('\0')
222 index = raw.index('\0')
223 header = raw[:index]
223 header = raw[:index]
224 if header.startswith('v'):
224 if header.startswith('v'):
225 # v1 and above, header starts with 'v'
225 # v1 and above, header starts with 'v'
226 if header.startswith('v1\n'):
226 if header.startswith('v1\n'):
227 for s in header.split('\n'):
227 for s in header.split('\n'):
228 if s.startswith(constants.METAKEYSIZE):
228 if s.startswith(constants.METAKEYSIZE):
229 size = int(s[len(constants.METAKEYSIZE):])
229 size = int(s[len(constants.METAKEYSIZE):])
230 elif s.startswith(constants.METAKEYFLAG):
230 elif s.startswith(constants.METAKEYFLAG):
231 flags = int(s[len(constants.METAKEYFLAG):])
231 flags = int(s[len(constants.METAKEYFLAG):])
232 else:
232 else:
233 raise RuntimeError('unsupported remotefilelog header: %s'
233 raise RuntimeError('unsupported remotefilelog header: %s'
234 % header)
234 % header)
235 else:
235 else:
236 # v0, str(int(size)) is the header
236 # v0, str(int(size)) is the header
237 size = int(header)
237 size = int(header)
238 except ValueError:
238 except ValueError:
239 raise RuntimeError("unexpected remotefilelog header: illegal format")
239 raise RuntimeError("unexpected remotefilelog header: illegal format")
240 if size is None:
240 if size is None:
241 raise RuntimeError("unexpected remotefilelog header: no size found")
241 raise RuntimeError("unexpected remotefilelog header: no size found")
242 return index + 1, size, flags
242 return index + 1, size, flags
243
243
244 def buildfileblobheader(size, flags, version=None):
244 def buildfileblobheader(size, flags, version=None):
245 """return the header of a remotefilelog blob.
245 """return the header of a remotefilelog blob.
246
246
247 see remotefilelogserver.createfileblob for the format.
247 see remotefilelogserver.createfileblob for the format.
248 approximately the reverse of parsesizeflags.
248 approximately the reverse of parsesizeflags.
249
249
250 version could be 0 or 1, or None (auto decide).
250 version could be 0 or 1, or None (auto decide).
251 """
251 """
252 # choose v0 if flags is empty, otherwise v1
252 # choose v0 if flags is empty, otherwise v1
253 if version is None:
253 if version is None:
254 version = int(bool(flags))
254 version = int(bool(flags))
255 if version == 1:
255 if version == 1:
256 header = ('v1\n%s%d\n%s%d'
256 header = ('v1\n%s%d\n%s%d'
257 % (constants.METAKEYSIZE, size,
257 % (constants.METAKEYSIZE, size,
258 constants.METAKEYFLAG, flags))
258 constants.METAKEYFLAG, flags))
259 elif version == 0:
259 elif version == 0:
260 if flags:
260 if flags:
261 raise error.ProgrammingError('fileblob v0 does not support flag')
261 raise error.ProgrammingError('fileblob v0 does not support flag')
262 header = '%d' % size
262 header = '%d' % size
263 else:
263 else:
264 raise error.ProgrammingError('unknown fileblob version %d' % version)
264 raise error.ProgrammingError('unknown fileblob version %d' % version)
265 return header
265 return header
266
266
267 def ancestormap(raw):
267 def ancestormap(raw):
268 offset, size, flags = parsesizeflags(raw)
268 offset, size, flags = parsesizeflags(raw)
269 start = offset + size
269 start = offset + size
270
270
271 mapping = {}
271 mapping = {}
272 while start < len(raw):
272 while start < len(raw):
273 divider = raw.index('\0', start + 80)
273 divider = raw.index('\0', start + 80)
274
274
275 currentnode = raw[start:(start + 20)]
275 currentnode = raw[start:(start + 20)]
276 p1 = raw[(start + 20):(start + 40)]
276 p1 = raw[(start + 20):(start + 40)]
277 p2 = raw[(start + 40):(start + 60)]
277 p2 = raw[(start + 40):(start + 60)]
278 linknode = raw[(start + 60):(start + 80)]
278 linknode = raw[(start + 60):(start + 80)]
279 copyfrom = raw[(start + 80):divider]
279 copyfrom = raw[(start + 80):divider]
280
280
281 mapping[currentnode] = (p1, p2, linknode, copyfrom)
281 mapping[currentnode] = (p1, p2, linknode, copyfrom)
282 start = divider + 1
282 start = divider + 1
283
283
284 return mapping
284 return mapping
285
285
286 def readfile(path):
286 def readfile(path):
287 f = open(path, 'rb')
287 f = open(path, 'rb')
288 try:
288 try:
289 result = f.read()
289 result = f.read()
290
290
291 # we should never have empty files
291 # we should never have empty files
292 if not result:
292 if not result:
293 os.remove(path)
293 os.remove(path)
294 raise IOError("empty file: %s" % path)
294 raise IOError("empty file: %s" % path)
295
295
296 return result
296 return result
297 finally:
297 finally:
298 f.close()
298 f.close()
299
299
300 def unlinkfile(filepath):
300 def unlinkfile(filepath):
301 if pycompat.iswindows:
301 if pycompat.iswindows:
302 # On Windows, os.unlink cannnot delete readonly files
302 # On Windows, os.unlink cannnot delete readonly files
303 os.chmod(filepath, stat.S_IWUSR)
303 os.chmod(filepath, stat.S_IWUSR)
304 os.unlink(filepath)
304 os.unlink(filepath)
305
305
306 def renamefile(source, destination):
306 def renamefile(source, destination):
307 if pycompat.iswindows:
307 if pycompat.iswindows:
308 # On Windows, os.rename cannot rename readonly files
308 # On Windows, os.rename cannot rename readonly files
309 # and cannot overwrite destination if it exists
309 # and cannot overwrite destination if it exists
310 os.chmod(source, stat.S_IWUSR)
310 os.chmod(source, stat.S_IWUSR)
311 if os.path.isfile(destination):
311 if os.path.isfile(destination):
312 os.chmod(destination, stat.S_IWUSR)
312 os.chmod(destination, stat.S_IWUSR)
313 os.unlink(destination)
313 os.unlink(destination)
314
314
315 os.rename(source, destination)
315 os.rename(source, destination)
316
316
317 def writefile(path, content, readonly=False):
317 def writefile(path, content, readonly=False):
318 dirname, filename = os.path.split(path)
318 dirname, filename = os.path.split(path)
319 if not os.path.exists(dirname):
319 if not os.path.exists(dirname):
320 try:
320 try:
321 os.makedirs(dirname)
321 os.makedirs(dirname)
322 except OSError as ex:
322 except OSError as ex:
323 if ex.errno != errno.EEXIST:
323 if ex.errno != errno.EEXIST:
324 raise
324 raise
325
325
326 fd, temp = tempfile.mkstemp(prefix='.%s-' % filename, dir=dirname)
326 fd, temp = tempfile.mkstemp(prefix='.%s-' % filename, dir=dirname)
327 os.close(fd)
327 os.close(fd)
328
328
329 try:
329 try:
330 f = util.posixfile(temp, 'wb')
330 f = util.posixfile(temp, 'wb')
331 f.write(content)
331 f.write(content)
332 f.close()
332 f.close()
333
333
334 if readonly:
334 if readonly:
335 mode = 0o444
335 mode = 0o444
336 else:
336 else:
337 # tempfiles are created with 0o600, so we need to manually set the
337 # tempfiles are created with 0o600, so we need to manually set the
338 # mode.
338 # mode.
339 oldumask = os.umask(0)
339 oldumask = os.umask(0)
340 # there's no way to get the umask without modifying it, so set it
340 # there's no way to get the umask without modifying it, so set it
341 # back
341 # back
342 os.umask(oldumask)
342 os.umask(oldumask)
343 mode = ~oldumask
343 mode = ~oldumask
344
344
345 renamefile(temp, path)
345 renamefile(temp, path)
346 os.chmod(path, mode)
346 os.chmod(path, mode)
347 except Exception:
347 except Exception:
348 try:
348 try:
349 unlinkfile(temp)
349 unlinkfile(temp)
350 except OSError:
350 except OSError:
351 pass
351 pass
352 raise
352 raise
353
353
354 def sortnodes(nodes, parentfunc):
354 def sortnodes(nodes, parentfunc):
355 """Topologically sorts the nodes, using the parentfunc to find
355 """Topologically sorts the nodes, using the parentfunc to find
356 the parents of nodes."""
356 the parents of nodes."""
357 nodes = set(nodes)
357 nodes = set(nodes)
358 childmap = {}
358 childmap = {}
359 parentmap = {}
359 parentmap = {}
360 roots = []
360 roots = []
361
361
362 # Build a child and parent map
362 # Build a child and parent map
363 for n in nodes:
363 for n in nodes:
364 parents = [p for p in parentfunc(n) if p in nodes]
364 parents = [p for p in parentfunc(n) if p in nodes]
365 parentmap[n] = set(parents)
365 parentmap[n] = set(parents)
366 for p in parents:
366 for p in parents:
367 childmap.setdefault(p, set()).add(n)
367 childmap.setdefault(p, set()).add(n)
368 if not parents:
368 if not parents:
369 roots.append(n)
369 roots.append(n)
370
370
371 roots.sort()
371 roots.sort()
372 # Process roots, adding children to the queue as they become roots
372 # Process roots, adding children to the queue as they become roots
373 results = []
373 results = []
374 while roots:
374 while roots:
375 n = roots.pop(0)
375 n = roots.pop(0)
376 results.append(n)
376 results.append(n)
377 if n in childmap:
377 if n in childmap:
378 children = childmap[n]
378 children = childmap[n]
379 for c in children:
379 for c in children:
380 childparents = parentmap[c]
380 childparents = parentmap[c]
381 childparents.remove(n)
381 childparents.remove(n)
382 if len(childparents) == 0:
382 if len(childparents) == 0:
383 # insert at the beginning, that way child nodes
383 # insert at the beginning, that way child nodes
384 # are likely to be output immediately after their
384 # are likely to be output immediately after their
385 # parents. This gives better compression results.
385 # parents. This gives better compression results.
386 roots.insert(0, c)
386 roots.insert(0, c)
387
387
388 return results
388 return results
389
389
390 def readexactly(stream, n):
390 def readexactly(stream, n):
391 '''read n bytes from stream.read and abort if less was available'''
391 '''read n bytes from stream.read and abort if less was available'''
392 s = stream.read(n)
392 s = stream.read(n)
393 if len(s) < n:
393 if len(s) < n:
394 raise error.Abort(_("stream ended unexpectedly"
394 raise error.Abort(_("stream ended unexpectedly"
395 " (got %d bytes, expected %d)")
395 " (got %d bytes, expected %d)")
396 % (len(s), n))
396 % (len(s), n))
397 return s
397 return s
398
398
399 def readunpack(stream, fmt):
399 def readunpack(stream, fmt):
400 data = readexactly(stream, struct.calcsize(fmt))
400 data = readexactly(stream, struct.calcsize(fmt))
401 return struct.unpack(fmt, data)
401 return struct.unpack(fmt, data)
402
402
403 def readpath(stream):
403 def readpath(stream):
404 rawlen = readexactly(stream, constants.FILENAMESIZE)
404 rawlen = readexactly(stream, constants.FILENAMESIZE)
405 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
405 pathlen = struct.unpack(constants.FILENAMESTRUCT, rawlen)[0]
406 return readexactly(stream, pathlen)
406 return readexactly(stream, pathlen)
407
407
408 def readnodelist(stream):
408 def readnodelist(stream):
409 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
409 rawlen = readexactly(stream, constants.NODECOUNTSIZE)
410 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
410 nodecount = struct.unpack(constants.NODECOUNTSTRUCT, rawlen)[0]
411 for i in pycompat.xrange(nodecount):
411 for i in pycompat.xrange(nodecount):
412 yield readexactly(stream, constants.NODESIZE)
412 yield readexactly(stream, constants.NODESIZE)
413
413
414 def readpathlist(stream):
414 def readpathlist(stream):
415 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
415 rawlen = readexactly(stream, constants.PATHCOUNTSIZE)
416 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
416 pathcount = struct.unpack(constants.PATHCOUNTSTRUCT, rawlen)[0]
417 for i in pycompat.xrange(pathcount):
417 for i in pycompat.xrange(pathcount):
418 yield readpath(stream)
418 yield readpath(stream)
419
419
420 def getgid(groupname):
420 def getgid(groupname):
421 try:
421 try:
422 gid = grp.getgrnam(groupname).gr_gid
422 gid = grp.getgrnam(groupname).gr_gid
423 return gid
423 return gid
424 except KeyError:
424 except KeyError:
425 return None
425 return None
426
426
427 def setstickygroupdir(path, gid, warn=None):
427 def setstickygroupdir(path, gid, warn=None):
428 if gid is None:
428 if gid is None:
429 return
429 return
430 try:
430 try:
431 os.chown(path, -1, gid)
431 os.chown(path, -1, gid)
432 os.chmod(path, 0o2775)
432 os.chmod(path, 0o2775)
433 except (IOError, OSError) as ex:
433 except (IOError, OSError) as ex:
434 if warn:
434 if warn:
435 warn(_('unable to chown/chmod on %s: %s\n') % (path, ex))
435 warn(_('unable to chown/chmod on %s: %s\n') % (path, ex))
436
436
437 def mkstickygroupdir(ui, path):
437 def mkstickygroupdir(ui, path):
438 """Creates the given directory (if it doesn't exist) and give it a
438 """Creates the given directory (if it doesn't exist) and give it a
439 particular group with setgid enabled."""
439 particular group with setgid enabled."""
440 gid = None
440 gid = None
441 groupname = ui.config("remotefilelog", "cachegroup")
441 groupname = ui.config("remotefilelog", "cachegroup")
442 if groupname:
442 if groupname:
443 gid = getgid(groupname)
443 gid = getgid(groupname)
444 if gid is None:
444 if gid is None:
445 ui.warn(_('unable to resolve group name: %s\n') % groupname)
445 ui.warn(_('unable to resolve group name: %s\n') % groupname)
446
446
447 # we use a single stat syscall to test the existence and mode / group bit
447 # we use a single stat syscall to test the existence and mode / group bit
448 st = None
448 st = None
449 try:
449 try:
450 st = os.stat(path)
450 st = os.stat(path)
451 except OSError:
451 except OSError:
452 pass
452 pass
453
453
454 if st:
454 if st:
455 # exists
455 # exists
456 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
456 if (st.st_mode & 0o2775) != 0o2775 or st.st_gid != gid:
457 # permission needs to be fixed
457 # permission needs to be fixed
458 setstickygroupdir(path, gid, ui.warn)
458 setstickygroupdir(path, gid, ui.warn)
459 return
459 return
460
460
461 oldumask = os.umask(0o002)
461 oldumask = os.umask(0o002)
462 try:
462 try:
463 missingdirs = [path]
463 missingdirs = [path]
464 path = os.path.dirname(path)
464 path = os.path.dirname(path)
465 while path and not os.path.exists(path):
465 while path and not os.path.exists(path):
466 missingdirs.append(path)
466 missingdirs.append(path)
467 path = os.path.dirname(path)
467 path = os.path.dirname(path)
468
468
469 for path in reversed(missingdirs):
469 for path in reversed(missingdirs):
470 try:
470 try:
471 os.mkdir(path)
471 os.mkdir(path)
472 except OSError as ex:
472 except OSError as ex:
473 if ex.errno != errno.EEXIST:
473 if ex.errno != errno.EEXIST:
474 raise
474 raise
475
475
476 for path in missingdirs:
476 for path in missingdirs:
477 setstickygroupdir(path, gid, ui.warn)
477 setstickygroupdir(path, gid, ui.warn)
478 finally:
478 finally:
479 os.umask(oldumask)
479 os.umask(oldumask)
480
480
481 def getusername(ui):
481 def getusername(ui):
482 try:
482 try:
483 return stringutil.shortuser(ui.username())
483 return stringutil.shortuser(ui.username())
484 except Exception:
484 except Exception:
485 return 'unknown'
485 return 'unknown'
486
486
487 def getreponame(ui):
487 def getreponame(ui):
488 reponame = ui.config('paths', 'default')
488 reponame = ui.config('paths', 'default')
489 if reponame:
489 if reponame:
490 return os.path.basename(reponame)
490 return os.path.basename(reponame)
491 return "unknown"
491 return "unknown"
General Comments 0
You need to be logged in to leave comments. Login now