##// END OF EJS Templates
manifest: use `read_any_fast_delta` during remotefilelog's repack...
marmoute -
r52671:7e5ea2a0 default
parent child Browse files
Show More
@@ -1,906 +1,902
1 import os
1 import os
2 import time
2 import time
3
3
4 from mercurial.i18n import _
4 from mercurial.i18n import _
5 from mercurial.node import short
5 from mercurial.node import short
6 from mercurial import (
6 from mercurial import (
7 encoding,
7 encoding,
8 error,
8 error,
9 lock as lockmod,
9 lock as lockmod,
10 mdiff,
10 mdiff,
11 policy,
11 policy,
12 scmutil,
12 scmutil,
13 util,
13 util,
14 vfs,
14 vfs,
15 )
15 )
16 from mercurial.utils import procutil
16 from mercurial.utils import procutil
17 from . import (
17 from . import (
18 constants,
18 constants,
19 contentstore,
19 contentstore,
20 datapack,
20 datapack,
21 historypack,
21 historypack,
22 metadatastore,
22 metadatastore,
23 shallowutil,
23 shallowutil,
24 )
24 )
25
25
26 osutil = policy.importmod('osutil')
26 osutil = policy.importmod('osutil')
27
27
28
28
29 class RepackAlreadyRunning(error.Abort):
29 class RepackAlreadyRunning(error.Abort):
30 pass
30 pass
31
31
32
32
33 def backgroundrepack(repo, incremental=True, packsonly=False):
33 def backgroundrepack(repo, incremental=True, packsonly=False):
34 cmd = [procutil.hgexecutable(), b'-R', repo.origroot, b'repack']
34 cmd = [procutil.hgexecutable(), b'-R', repo.origroot, b'repack']
35 msg = _(b"(running background repack)\n")
35 msg = _(b"(running background repack)\n")
36 if incremental:
36 if incremental:
37 cmd.append(b'--incremental')
37 cmd.append(b'--incremental')
38 msg = _(b"(running background incremental repack)\n")
38 msg = _(b"(running background incremental repack)\n")
39 if packsonly:
39 if packsonly:
40 cmd.append(b'--packsonly')
40 cmd.append(b'--packsonly')
41 repo.ui.warn(msg)
41 repo.ui.warn(msg)
42 # We know this command will find a binary, so don't block on it starting.
42 # We know this command will find a binary, so don't block on it starting.
43 kwargs = {}
43 kwargs = {}
44 if repo.ui.configbool(b'devel', b'remotefilelog.bg-wait'):
44 if repo.ui.configbool(b'devel', b'remotefilelog.bg-wait'):
45 kwargs['record_wait'] = repo.ui.atexit
45 kwargs['record_wait'] = repo.ui.atexit
46
46
47 procutil.runbgcommand(cmd, encoding.environ, ensurestart=False, **kwargs)
47 procutil.runbgcommand(cmd, encoding.environ, ensurestart=False, **kwargs)
48
48
49
49
50 def fullrepack(repo, options=None):
50 def fullrepack(repo, options=None):
51 """If ``packsonly`` is True, stores creating only loose objects are skipped."""
51 """If ``packsonly`` is True, stores creating only loose objects are skipped."""
52 if hasattr(repo, 'shareddatastores'):
52 if hasattr(repo, 'shareddatastores'):
53 datasource = contentstore.unioncontentstore(*repo.shareddatastores)
53 datasource = contentstore.unioncontentstore(*repo.shareddatastores)
54 historysource = metadatastore.unionmetadatastore(
54 historysource = metadatastore.unionmetadatastore(
55 *repo.sharedhistorystores, allowincomplete=True
55 *repo.sharedhistorystores, allowincomplete=True
56 )
56 )
57
57
58 packpath = shallowutil.getcachepackpath(
58 packpath = shallowutil.getcachepackpath(
59 repo, constants.FILEPACK_CATEGORY
59 repo, constants.FILEPACK_CATEGORY
60 )
60 )
61 _runrepack(
61 _runrepack(
62 repo,
62 repo,
63 datasource,
63 datasource,
64 historysource,
64 historysource,
65 packpath,
65 packpath,
66 constants.FILEPACK_CATEGORY,
66 constants.FILEPACK_CATEGORY,
67 options=options,
67 options=options,
68 )
68 )
69
69
70 if hasattr(repo.manifestlog, 'datastore'):
70 if hasattr(repo.manifestlog, 'datastore'):
71 localdata, shareddata = _getmanifeststores(repo)
71 localdata, shareddata = _getmanifeststores(repo)
72 lpackpath, ldstores, lhstores = localdata
72 lpackpath, ldstores, lhstores = localdata
73 spackpath, sdstores, shstores = shareddata
73 spackpath, sdstores, shstores = shareddata
74
74
75 # Repack the shared manifest store
75 # Repack the shared manifest store
76 datasource = contentstore.unioncontentstore(*sdstores)
76 datasource = contentstore.unioncontentstore(*sdstores)
77 historysource = metadatastore.unionmetadatastore(
77 historysource = metadatastore.unionmetadatastore(
78 *shstores, allowincomplete=True
78 *shstores, allowincomplete=True
79 )
79 )
80 _runrepack(
80 _runrepack(
81 repo,
81 repo,
82 datasource,
82 datasource,
83 historysource,
83 historysource,
84 spackpath,
84 spackpath,
85 constants.TREEPACK_CATEGORY,
85 constants.TREEPACK_CATEGORY,
86 options=options,
86 options=options,
87 )
87 )
88
88
89 # Repack the local manifest store
89 # Repack the local manifest store
90 datasource = contentstore.unioncontentstore(
90 datasource = contentstore.unioncontentstore(
91 *ldstores, allowincomplete=True
91 *ldstores, allowincomplete=True
92 )
92 )
93 historysource = metadatastore.unionmetadatastore(
93 historysource = metadatastore.unionmetadatastore(
94 *lhstores, allowincomplete=True
94 *lhstores, allowincomplete=True
95 )
95 )
96 _runrepack(
96 _runrepack(
97 repo,
97 repo,
98 datasource,
98 datasource,
99 historysource,
99 historysource,
100 lpackpath,
100 lpackpath,
101 constants.TREEPACK_CATEGORY,
101 constants.TREEPACK_CATEGORY,
102 options=options,
102 options=options,
103 )
103 )
104
104
105
105
106 def incrementalrepack(repo, options=None):
106 def incrementalrepack(repo, options=None):
107 """This repacks the repo by looking at the distribution of pack files in the
107 """This repacks the repo by looking at the distribution of pack files in the
108 repo and performing the most minimal repack to keep the repo in good shape.
108 repo and performing the most minimal repack to keep the repo in good shape.
109 """
109 """
110 if hasattr(repo, 'shareddatastores'):
110 if hasattr(repo, 'shareddatastores'):
111 packpath = shallowutil.getcachepackpath(
111 packpath = shallowutil.getcachepackpath(
112 repo, constants.FILEPACK_CATEGORY
112 repo, constants.FILEPACK_CATEGORY
113 )
113 )
114 _incrementalrepack(
114 _incrementalrepack(
115 repo,
115 repo,
116 repo.shareddatastores,
116 repo.shareddatastores,
117 repo.sharedhistorystores,
117 repo.sharedhistorystores,
118 packpath,
118 packpath,
119 constants.FILEPACK_CATEGORY,
119 constants.FILEPACK_CATEGORY,
120 options=options,
120 options=options,
121 )
121 )
122
122
123 if hasattr(repo.manifestlog, 'datastore'):
123 if hasattr(repo.manifestlog, 'datastore'):
124 localdata, shareddata = _getmanifeststores(repo)
124 localdata, shareddata = _getmanifeststores(repo)
125 lpackpath, ldstores, lhstores = localdata
125 lpackpath, ldstores, lhstores = localdata
126 spackpath, sdstores, shstores = shareddata
126 spackpath, sdstores, shstores = shareddata
127
127
128 # Repack the shared manifest store
128 # Repack the shared manifest store
129 _incrementalrepack(
129 _incrementalrepack(
130 repo,
130 repo,
131 sdstores,
131 sdstores,
132 shstores,
132 shstores,
133 spackpath,
133 spackpath,
134 constants.TREEPACK_CATEGORY,
134 constants.TREEPACK_CATEGORY,
135 options=options,
135 options=options,
136 )
136 )
137
137
138 # Repack the local manifest store
138 # Repack the local manifest store
139 _incrementalrepack(
139 _incrementalrepack(
140 repo,
140 repo,
141 ldstores,
141 ldstores,
142 lhstores,
142 lhstores,
143 lpackpath,
143 lpackpath,
144 constants.TREEPACK_CATEGORY,
144 constants.TREEPACK_CATEGORY,
145 allowincompletedata=True,
145 allowincompletedata=True,
146 options=options,
146 options=options,
147 )
147 )
148
148
149
149
150 def _getmanifeststores(repo):
150 def _getmanifeststores(repo):
151 shareddatastores = repo.manifestlog.shareddatastores
151 shareddatastores = repo.manifestlog.shareddatastores
152 localdatastores = repo.manifestlog.localdatastores
152 localdatastores = repo.manifestlog.localdatastores
153 sharedhistorystores = repo.manifestlog.sharedhistorystores
153 sharedhistorystores = repo.manifestlog.sharedhistorystores
154 localhistorystores = repo.manifestlog.localhistorystores
154 localhistorystores = repo.manifestlog.localhistorystores
155
155
156 sharedpackpath = shallowutil.getcachepackpath(
156 sharedpackpath = shallowutil.getcachepackpath(
157 repo, constants.TREEPACK_CATEGORY
157 repo, constants.TREEPACK_CATEGORY
158 )
158 )
159 localpackpath = shallowutil.getlocalpackpath(
159 localpackpath = shallowutil.getlocalpackpath(
160 repo.svfs.vfs.base, constants.TREEPACK_CATEGORY
160 repo.svfs.vfs.base, constants.TREEPACK_CATEGORY
161 )
161 )
162
162
163 return (
163 return (
164 (localpackpath, localdatastores, localhistorystores),
164 (localpackpath, localdatastores, localhistorystores),
165 (sharedpackpath, shareddatastores, sharedhistorystores),
165 (sharedpackpath, shareddatastores, sharedhistorystores),
166 )
166 )
167
167
168
168
169 def _topacks(packpath, files, constructor):
169 def _topacks(packpath, files, constructor):
170 paths = list(os.path.join(packpath, p) for p in files)
170 paths = list(os.path.join(packpath, p) for p in files)
171 packs = list(constructor(p) for p in paths)
171 packs = list(constructor(p) for p in paths)
172 return packs
172 return packs
173
173
174
174
175 def _deletebigpacks(repo, folder, files):
175 def _deletebigpacks(repo, folder, files):
176 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
176 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
177
177
178 Returns ``files` with the removed files omitted."""
178 Returns ``files` with the removed files omitted."""
179 maxsize = repo.ui.configbytes(b"packs", b"maxpacksize")
179 maxsize = repo.ui.configbytes(b"packs", b"maxpacksize")
180 if maxsize <= 0:
180 if maxsize <= 0:
181 return files
181 return files
182
182
183 # This only considers datapacks today, but we could broaden it to include
183 # This only considers datapacks today, but we could broaden it to include
184 # historypacks.
184 # historypacks.
185 VALIDEXTS = [b".datapack", b".dataidx"]
185 VALIDEXTS = [b".datapack", b".dataidx"]
186
186
187 # Either an oversize index or datapack will trigger cleanup of the whole
187 # Either an oversize index or datapack will trigger cleanup of the whole
188 # pack:
188 # pack:
189 oversized = {
189 oversized = {
190 os.path.splitext(path)[0]
190 os.path.splitext(path)[0]
191 for path, ftype, stat in files
191 for path, ftype, stat in files
192 if (stat.st_size > maxsize and (os.path.splitext(path)[1] in VALIDEXTS))
192 if (stat.st_size > maxsize and (os.path.splitext(path)[1] in VALIDEXTS))
193 }
193 }
194
194
195 for rootfname in oversized:
195 for rootfname in oversized:
196 rootpath = os.path.join(folder, rootfname)
196 rootpath = os.path.join(folder, rootfname)
197 for ext in VALIDEXTS:
197 for ext in VALIDEXTS:
198 path = rootpath + ext
198 path = rootpath + ext
199 repo.ui.debug(
199 repo.ui.debug(
200 b'removing oversize packfile %s (%s)\n'
200 b'removing oversize packfile %s (%s)\n'
201 % (path, util.bytecount(os.stat(path).st_size))
201 % (path, util.bytecount(os.stat(path).st_size))
202 )
202 )
203 os.unlink(path)
203 os.unlink(path)
204 return [row for row in files if os.path.basename(row[0]) not in oversized]
204 return [row for row in files if os.path.basename(row[0]) not in oversized]
205
205
206
206
207 def _incrementalrepack(
207 def _incrementalrepack(
208 repo,
208 repo,
209 datastore,
209 datastore,
210 historystore,
210 historystore,
211 packpath,
211 packpath,
212 category,
212 category,
213 allowincompletedata=False,
213 allowincompletedata=False,
214 options=None,
214 options=None,
215 ):
215 ):
216 shallowutil.mkstickygroupdir(repo.ui, packpath)
216 shallowutil.mkstickygroupdir(repo.ui, packpath)
217
217
218 files = osutil.listdir(packpath, stat=True)
218 files = osutil.listdir(packpath, stat=True)
219 files = _deletebigpacks(repo, packpath, files)
219 files = _deletebigpacks(repo, packpath, files)
220 datapacks = _topacks(
220 datapacks = _topacks(
221 packpath, _computeincrementaldatapack(repo.ui, files), datapack.datapack
221 packpath, _computeincrementaldatapack(repo.ui, files), datapack.datapack
222 )
222 )
223 datapacks.extend(
223 datapacks.extend(
224 s for s in datastore if not isinstance(s, datapack.datapackstore)
224 s for s in datastore if not isinstance(s, datapack.datapackstore)
225 )
225 )
226
226
227 historypacks = _topacks(
227 historypacks = _topacks(
228 packpath,
228 packpath,
229 _computeincrementalhistorypack(repo.ui, files),
229 _computeincrementalhistorypack(repo.ui, files),
230 historypack.historypack,
230 historypack.historypack,
231 )
231 )
232 historypacks.extend(
232 historypacks.extend(
233 s
233 s
234 for s in historystore
234 for s in historystore
235 if not isinstance(s, historypack.historypackstore)
235 if not isinstance(s, historypack.historypackstore)
236 )
236 )
237
237
238 # ``allhistory{files,packs}`` contains all known history packs, even ones we
238 # ``allhistory{files,packs}`` contains all known history packs, even ones we
239 # don't plan to repack. They are used during the datapack repack to ensure
239 # don't plan to repack. They are used during the datapack repack to ensure
240 # good ordering of nodes.
240 # good ordering of nodes.
241 allhistoryfiles = _allpackfileswithsuffix(
241 allhistoryfiles = _allpackfileswithsuffix(
242 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
242 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
243 )
243 )
244 allhistorypacks = _topacks(
244 allhistorypacks = _topacks(
245 packpath,
245 packpath,
246 (f for f, mode, stat in allhistoryfiles),
246 (f for f, mode, stat in allhistoryfiles),
247 historypack.historypack,
247 historypack.historypack,
248 )
248 )
249 allhistorypacks.extend(
249 allhistorypacks.extend(
250 s
250 s
251 for s in historystore
251 for s in historystore
252 if not isinstance(s, historypack.historypackstore)
252 if not isinstance(s, historypack.historypackstore)
253 )
253 )
254 _runrepack(
254 _runrepack(
255 repo,
255 repo,
256 contentstore.unioncontentstore(
256 contentstore.unioncontentstore(
257 *datapacks, allowincomplete=allowincompletedata
257 *datapacks, allowincomplete=allowincompletedata
258 ),
258 ),
259 metadatastore.unionmetadatastore(*historypacks, allowincomplete=True),
259 metadatastore.unionmetadatastore(*historypacks, allowincomplete=True),
260 packpath,
260 packpath,
261 category,
261 category,
262 fullhistory=metadatastore.unionmetadatastore(
262 fullhistory=metadatastore.unionmetadatastore(
263 *allhistorypacks, allowincomplete=True
263 *allhistorypacks, allowincomplete=True
264 ),
264 ),
265 options=options,
265 options=options,
266 )
266 )
267
267
268
268
269 def _computeincrementaldatapack(ui, files):
269 def _computeincrementaldatapack(ui, files):
270 opts = {
270 opts = {
271 b'gencountlimit': ui.configint(b'remotefilelog', b'data.gencountlimit'),
271 b'gencountlimit': ui.configint(b'remotefilelog', b'data.gencountlimit'),
272 b'generations': ui.configlist(b'remotefilelog', b'data.generations'),
272 b'generations': ui.configlist(b'remotefilelog', b'data.generations'),
273 b'maxrepackpacks': ui.configint(
273 b'maxrepackpacks': ui.configint(
274 b'remotefilelog', b'data.maxrepackpacks'
274 b'remotefilelog', b'data.maxrepackpacks'
275 ),
275 ),
276 b'repackmaxpacksize': ui.configbytes(
276 b'repackmaxpacksize': ui.configbytes(
277 b'remotefilelog', b'data.repackmaxpacksize'
277 b'remotefilelog', b'data.repackmaxpacksize'
278 ),
278 ),
279 b'repacksizelimit': ui.configbytes(
279 b'repacksizelimit': ui.configbytes(
280 b'remotefilelog', b'data.repacksizelimit'
280 b'remotefilelog', b'data.repacksizelimit'
281 ),
281 ),
282 }
282 }
283
283
284 packfiles = _allpackfileswithsuffix(
284 packfiles = _allpackfileswithsuffix(
285 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX
285 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX
286 )
286 )
287 return _computeincrementalpack(packfiles, opts)
287 return _computeincrementalpack(packfiles, opts)
288
288
289
289
290 def _computeincrementalhistorypack(ui, files):
290 def _computeincrementalhistorypack(ui, files):
291 opts = {
291 opts = {
292 b'gencountlimit': ui.configint(
292 b'gencountlimit': ui.configint(
293 b'remotefilelog', b'history.gencountlimit'
293 b'remotefilelog', b'history.gencountlimit'
294 ),
294 ),
295 b'generations': ui.configlist(
295 b'generations': ui.configlist(
296 b'remotefilelog', b'history.generations', [b'100MB']
296 b'remotefilelog', b'history.generations', [b'100MB']
297 ),
297 ),
298 b'maxrepackpacks': ui.configint(
298 b'maxrepackpacks': ui.configint(
299 b'remotefilelog', b'history.maxrepackpacks'
299 b'remotefilelog', b'history.maxrepackpacks'
300 ),
300 ),
301 b'repackmaxpacksize': ui.configbytes(
301 b'repackmaxpacksize': ui.configbytes(
302 b'remotefilelog', b'history.repackmaxpacksize', b'400MB'
302 b'remotefilelog', b'history.repackmaxpacksize', b'400MB'
303 ),
303 ),
304 b'repacksizelimit': ui.configbytes(
304 b'repacksizelimit': ui.configbytes(
305 b'remotefilelog', b'history.repacksizelimit'
305 b'remotefilelog', b'history.repacksizelimit'
306 ),
306 ),
307 }
307 }
308
308
309 packfiles = _allpackfileswithsuffix(
309 packfiles = _allpackfileswithsuffix(
310 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
310 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX
311 )
311 )
312 return _computeincrementalpack(packfiles, opts)
312 return _computeincrementalpack(packfiles, opts)
313
313
314
314
315 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
315 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
316 result = []
316 result = []
317 fileset = {fn for fn, mode, stat in files}
317 fileset = {fn for fn, mode, stat in files}
318 for filename, mode, stat in files:
318 for filename, mode, stat in files:
319 if not filename.endswith(packsuffix):
319 if not filename.endswith(packsuffix):
320 continue
320 continue
321
321
322 prefix = filename[: -len(packsuffix)]
322 prefix = filename[: -len(packsuffix)]
323
323
324 # Don't process a pack if it doesn't have an index.
324 # Don't process a pack if it doesn't have an index.
325 if (prefix + indexsuffix) not in fileset:
325 if (prefix + indexsuffix) not in fileset:
326 continue
326 continue
327 result.append((prefix, mode, stat))
327 result.append((prefix, mode, stat))
328
328
329 return result
329 return result
330
330
331
331
332 def _computeincrementalpack(files, opts):
332 def _computeincrementalpack(files, opts):
333 """Given a set of pack files along with the configuration options, this
333 """Given a set of pack files along with the configuration options, this
334 function computes the list of files that should be packed as part of an
334 function computes the list of files that should be packed as part of an
335 incremental repack.
335 incremental repack.
336
336
337 It tries to strike a balance between keeping incremental repacks cheap (i.e.
337 It tries to strike a balance between keeping incremental repacks cheap (i.e.
338 packing small things when possible, and rolling the packs up to the big ones
338 packing small things when possible, and rolling the packs up to the big ones
339 over time).
339 over time).
340 """
340 """
341
341
342 limits = list(
342 limits = list(
343 sorted((util.sizetoint(s) for s in opts[b'generations']), reverse=True)
343 sorted((util.sizetoint(s) for s in opts[b'generations']), reverse=True)
344 )
344 )
345 limits.append(0)
345 limits.append(0)
346
346
347 # Group the packs by generation (i.e. by size)
347 # Group the packs by generation (i.e. by size)
348 generations = []
348 generations = []
349 for i in range(len(limits)):
349 for i in range(len(limits)):
350 generations.append([])
350 generations.append([])
351
351
352 sizes = {}
352 sizes = {}
353 for prefix, mode, stat in files:
353 for prefix, mode, stat in files:
354 size = stat.st_size
354 size = stat.st_size
355 if size > opts[b'repackmaxpacksize']:
355 if size > opts[b'repackmaxpacksize']:
356 continue
356 continue
357
357
358 sizes[prefix] = size
358 sizes[prefix] = size
359 for i, limit in enumerate(limits):
359 for i, limit in enumerate(limits):
360 if size > limit:
360 if size > limit:
361 generations[i].append(prefix)
361 generations[i].append(prefix)
362 break
362 break
363
363
364 # Steps for picking what packs to repack:
364 # Steps for picking what packs to repack:
365 # 1. Pick the largest generation with > gencountlimit pack files.
365 # 1. Pick the largest generation with > gencountlimit pack files.
366 # 2. Take the smallest three packs.
366 # 2. Take the smallest three packs.
367 # 3. While total-size-of-packs < repacksizelimit: add another pack
367 # 3. While total-size-of-packs < repacksizelimit: add another pack
368
368
369 # Find the largest generation with more than gencountlimit packs
369 # Find the largest generation with more than gencountlimit packs
370 genpacks = []
370 genpacks = []
371 for i, limit in enumerate(limits):
371 for i, limit in enumerate(limits):
372 if len(generations[i]) > opts[b'gencountlimit']:
372 if len(generations[i]) > opts[b'gencountlimit']:
373 # Sort to be smallest last, for easy popping later
373 # Sort to be smallest last, for easy popping later
374 genpacks.extend(
374 genpacks.extend(
375 sorted(generations[i], reverse=True, key=lambda x: sizes[x])
375 sorted(generations[i], reverse=True, key=lambda x: sizes[x])
376 )
376 )
377 break
377 break
378
378
379 # Take as many packs from the generation as we can
379 # Take as many packs from the generation as we can
380 chosenpacks = genpacks[-3:]
380 chosenpacks = genpacks[-3:]
381 genpacks = genpacks[:-3]
381 genpacks = genpacks[:-3]
382 repacksize = sum(sizes[n] for n in chosenpacks)
382 repacksize = sum(sizes[n] for n in chosenpacks)
383 while (
383 while (
384 repacksize < opts[b'repacksizelimit']
384 repacksize < opts[b'repacksizelimit']
385 and genpacks
385 and genpacks
386 and len(chosenpacks) < opts[b'maxrepackpacks']
386 and len(chosenpacks) < opts[b'maxrepackpacks']
387 ):
387 ):
388 chosenpacks.append(genpacks.pop())
388 chosenpacks.append(genpacks.pop())
389 repacksize += sizes[chosenpacks[-1]]
389 repacksize += sizes[chosenpacks[-1]]
390
390
391 return chosenpacks
391 return chosenpacks
392
392
393
393
394 def _runrepack(
394 def _runrepack(
395 repo, data, history, packpath, category, fullhistory=None, options=None
395 repo, data, history, packpath, category, fullhistory=None, options=None
396 ):
396 ):
397 shallowutil.mkstickygroupdir(repo.ui, packpath)
397 shallowutil.mkstickygroupdir(repo.ui, packpath)
398
398
399 def isold(repo, filename, node):
399 def isold(repo, filename, node):
400 """Check if the file node is older than a limit.
400 """Check if the file node is older than a limit.
401 Unless a limit is specified in the config the default limit is taken.
401 Unless a limit is specified in the config the default limit is taken.
402 """
402 """
403 filectx = repo.filectx(filename, fileid=node)
403 filectx = repo.filectx(filename, fileid=node)
404 filetime = repo[filectx.linkrev()].date()
404 filetime = repo[filectx.linkrev()].date()
405
405
406 ttl = repo.ui.configint(b'remotefilelog', b'nodettl')
406 ttl = repo.ui.configint(b'remotefilelog', b'nodettl')
407
407
408 limit = time.time() - ttl
408 limit = time.time() - ttl
409 return filetime[0] < limit
409 return filetime[0] < limit
410
410
411 garbagecollect = repo.ui.configbool(b'remotefilelog', b'gcrepack')
411 garbagecollect = repo.ui.configbool(b'remotefilelog', b'gcrepack')
412 if not fullhistory:
412 if not fullhistory:
413 fullhistory = history
413 fullhistory = history
414 packer = repacker(
414 packer = repacker(
415 repo,
415 repo,
416 data,
416 data,
417 history,
417 history,
418 fullhistory,
418 fullhistory,
419 category,
419 category,
420 gc=garbagecollect,
420 gc=garbagecollect,
421 isold=isold,
421 isold=isold,
422 options=options,
422 options=options,
423 )
423 )
424
424
425 with datapack.mutabledatapack(repo.ui, packpath) as dpack:
425 with datapack.mutabledatapack(repo.ui, packpath) as dpack:
426 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
426 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
427 try:
427 try:
428 packer.run(dpack, hpack)
428 packer.run(dpack, hpack)
429 except error.LockHeld:
429 except error.LockHeld:
430 raise RepackAlreadyRunning(
430 raise RepackAlreadyRunning(
431 _(
431 _(
432 b"skipping repack - another repack "
432 b"skipping repack - another repack "
433 b"is already running"
433 b"is already running"
434 )
434 )
435 )
435 )
436
436
437
437
438 def keepset(repo, keyfn, lastkeepkeys=None):
438 def keepset(repo, keyfn, lastkeepkeys=None):
439 """Computes a keepset which is not garbage collected.
439 """Computes a keepset which is not garbage collected.
440 'keyfn' is a function that maps filename, node to a unique key.
440 'keyfn' is a function that maps filename, node to a unique key.
441 'lastkeepkeys' is an optional argument and if provided the keepset
441 'lastkeepkeys' is an optional argument and if provided the keepset
442 function updates lastkeepkeys with more keys and returns the result.
442 function updates lastkeepkeys with more keys and returns the result.
443 """
443 """
444 if not lastkeepkeys:
444 if not lastkeepkeys:
445 keepkeys = set()
445 keepkeys = set()
446 else:
446 else:
447 keepkeys = lastkeepkeys
447 keepkeys = lastkeepkeys
448
448
449 # We want to keep:
449 # We want to keep:
450 # 1. Working copy parent
450 # 1. Working copy parent
451 # 2. Draft commits
451 # 2. Draft commits
452 # 3. Parents of draft commits
452 # 3. Parents of draft commits
453 # 4. Pullprefetch and bgprefetchrevs revsets if specified
453 # 4. Pullprefetch and bgprefetchrevs revsets if specified
454 revs = [b'.', b'draft()', b'parents(draft())']
454 revs = [b'.', b'draft()', b'parents(draft())']
455 prefetchrevs = repo.ui.config(b'remotefilelog', b'pullprefetch', None)
455 prefetchrevs = repo.ui.config(b'remotefilelog', b'pullprefetch', None)
456 if prefetchrevs:
456 if prefetchrevs:
457 revs.append(b'(%s)' % prefetchrevs)
457 revs.append(b'(%s)' % prefetchrevs)
458 prefetchrevs = repo.ui.config(b'remotefilelog', b'bgprefetchrevs', None)
458 prefetchrevs = repo.ui.config(b'remotefilelog', b'bgprefetchrevs', None)
459 if prefetchrevs:
459 if prefetchrevs:
460 revs.append(b'(%s)' % prefetchrevs)
460 revs.append(b'(%s)' % prefetchrevs)
461 revs = b'+'.join(revs)
461 revs = b'+'.join(revs)
462
462
463 revs = [b'sort((%s), "topo")' % revs]
463 revs = [b'sort((%s), "topo")' % revs]
464 keep = scmutil.revrange(repo, revs)
464 keep = scmutil.revrange(repo, revs)
465
465
466 processed = set()
466 processed = set()
467 lastmanifest = None
467 lastmanifest = None
468
468
469 # process the commits in toposorted order starting from the oldest
469 # process the commits in toposorted order starting from the oldest
470 for r in reversed(keep._list):
470 for r in reversed(keep._list):
471 if repo[r].p1().rev() in processed:
471 delta_from, m = repo[r].manifestctx().read_any_fast_delta(processed)
472 # if the direct parent has already been processed
472 if delta_from is None and lastmanifest is not None:
473 # then we only need to process the delta
473 # could not find a delta, compute one.
474 m = repo[r].manifestctx().readdelta()
474 # XXX (is this really faster?)
475 else:
475 full = m
476 # otherwise take the manifest and diff it
477 # with the previous manifest if one exists
478 if lastmanifest:
476 if lastmanifest:
479 m = repo[r].manifest().diff(lastmanifest)
477 m = m.diff(lastmanifest)
480 else:
478 lastmanifest = full
481 m = repo[r].manifest()
482 lastmanifest = repo[r].manifest()
483 processed.add(r)
479 processed.add(r)
484
480
485 # populate keepkeys with keys from the current manifest
481 # populate keepkeys with keys from the current manifest
486 if type(m) is dict:
482 if type(m) is dict:
487 # m is a result of diff of two manifests and is a dictionary that
483 # m is a result of diff of two manifests and is a dictionary that
488 # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
484 # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
489 for filename, diff in m.items():
485 for filename, diff in m.items():
490 if diff[0][0] is not None:
486 if diff[0][0] is not None:
491 keepkeys.add(keyfn(filename, diff[0][0]))
487 keepkeys.add(keyfn(filename, diff[0][0]))
492 else:
488 else:
493 # m is a manifest object
489 # m is a manifest object
494 for filename, filenode in m.items():
490 for filename, filenode in m.items():
495 keepkeys.add(keyfn(filename, filenode))
491 keepkeys.add(keyfn(filename, filenode))
496
492
497 return keepkeys
493 return keepkeys
498
494
499
495
500 class repacker:
496 class repacker:
501 """Class for orchestrating the repack of data and history information into a
497 """Class for orchestrating the repack of data and history information into a
502 new format.
498 new format.
503 """
499 """
504
500
505 def __init__(
501 def __init__(
506 self,
502 self,
507 repo,
503 repo,
508 data,
504 data,
509 history,
505 history,
510 fullhistory,
506 fullhistory,
511 category,
507 category,
512 gc=False,
508 gc=False,
513 isold=None,
509 isold=None,
514 options=None,
510 options=None,
515 ):
511 ):
516 self.repo = repo
512 self.repo = repo
517 self.data = data
513 self.data = data
518 self.history = history
514 self.history = history
519 self.fullhistory = fullhistory
515 self.fullhistory = fullhistory
520 self.unit = constants.getunits(category)
516 self.unit = constants.getunits(category)
521 self.garbagecollect = gc
517 self.garbagecollect = gc
522 self.options = options
518 self.options = options
523 if self.garbagecollect:
519 if self.garbagecollect:
524 if not isold:
520 if not isold:
525 raise ValueError(b"Function 'isold' is not properly specified")
521 raise ValueError(b"Function 'isold' is not properly specified")
526 # use (filename, node) tuple as a keepset key
522 # use (filename, node) tuple as a keepset key
527 self.keepkeys = keepset(repo, lambda f, n: (f, n))
523 self.keepkeys = keepset(repo, lambda f, n: (f, n))
528 self.isold = isold
524 self.isold = isold
529
525
530 def run(self, targetdata, targethistory):
526 def run(self, targetdata, targethistory):
531 ledger = repackledger()
527 ledger = repackledger()
532
528
533 with lockmod.lock(
529 with lockmod.lock(
534 repacklockvfs(self.repo), b"repacklock", desc=None, timeout=0
530 repacklockvfs(self.repo), b"repacklock", desc=None, timeout=0
535 ):
531 ):
536 self.repo.hook(b'prerepack')
532 self.repo.hook(b'prerepack')
537
533
538 # Populate ledger from source
534 # Populate ledger from source
539 self.data.markledger(ledger, options=self.options)
535 self.data.markledger(ledger, options=self.options)
540 self.history.markledger(ledger, options=self.options)
536 self.history.markledger(ledger, options=self.options)
541
537
542 # Run repack
538 # Run repack
543 self.repackdata(ledger, targetdata)
539 self.repackdata(ledger, targetdata)
544 self.repackhistory(ledger, targethistory)
540 self.repackhistory(ledger, targethistory)
545
541
546 # Call cleanup on each source
542 # Call cleanup on each source
547 for source in ledger.sources:
543 for source in ledger.sources:
548 source.cleanup(ledger)
544 source.cleanup(ledger)
549
545
550 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
546 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
551 """Reorderes ``orphans`` into a single chain inside ``nodes`` and
547 """Reorderes ``orphans`` into a single chain inside ``nodes`` and
552 ``deltabases``.
548 ``deltabases``.
553
549
554 We often have orphan entries (nodes without a base that aren't
550 We often have orphan entries (nodes without a base that aren't
555 referenced by other nodes -- i.e., part of a chain) due to gaps in
551 referenced by other nodes -- i.e., part of a chain) due to gaps in
556 history. Rather than store them as individual fulltexts, we prefer to
552 history. Rather than store them as individual fulltexts, we prefer to
557 insert them as one chain sorted by size.
553 insert them as one chain sorted by size.
558 """
554 """
559 if not orphans:
555 if not orphans:
560 return nodes
556 return nodes
561
557
562 def getsize(node, default=0):
558 def getsize(node, default=0):
563 meta = self.data.getmeta(filename, node)
559 meta = self.data.getmeta(filename, node)
564 if constants.METAKEYSIZE in meta:
560 if constants.METAKEYSIZE in meta:
565 return meta[constants.METAKEYSIZE]
561 return meta[constants.METAKEYSIZE]
566 else:
562 else:
567 return default
563 return default
568
564
569 # Sort orphans by size; biggest first is preferred, since it's more
565 # Sort orphans by size; biggest first is preferred, since it's more
570 # likely to be the newest version assuming files grow over time.
566 # likely to be the newest version assuming files grow over time.
571 # (Sort by node first to ensure the sort is stable.)
567 # (Sort by node first to ensure the sort is stable.)
572 orphans = sorted(orphans)
568 orphans = sorted(orphans)
573 orphans = list(sorted(orphans, key=getsize, reverse=True))
569 orphans = list(sorted(orphans, key=getsize, reverse=True))
574 if ui.debugflag:
570 if ui.debugflag:
575 ui.debug(
571 ui.debug(
576 b"%s: orphan chain: %s\n"
572 b"%s: orphan chain: %s\n"
577 % (filename, b", ".join([short(s) for s in orphans]))
573 % (filename, b", ".join([short(s) for s in orphans]))
578 )
574 )
579
575
580 # Create one contiguous chain and reassign deltabases.
576 # Create one contiguous chain and reassign deltabases.
581 for i, node in enumerate(orphans):
577 for i, node in enumerate(orphans):
582 if i == 0:
578 if i == 0:
583 deltabases[node] = (self.repo.nullid, 0)
579 deltabases[node] = (self.repo.nullid, 0)
584 else:
580 else:
585 parent = orphans[i - 1]
581 parent = orphans[i - 1]
586 deltabases[node] = (parent, deltabases[parent][1] + 1)
582 deltabases[node] = (parent, deltabases[parent][1] + 1)
587 nodes = [n for n in nodes if n not in orphans]
583 nodes = [n for n in nodes if n not in orphans]
588 nodes += orphans
584 nodes += orphans
589 return nodes
585 return nodes
590
586
591 def repackdata(self, ledger, target):
587 def repackdata(self, ledger, target):
592 ui = self.repo.ui
588 ui = self.repo.ui
593 maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000)
589 maxchainlen = ui.configint(b'packs', b'maxchainlen', 1000)
594
590
595 byfile = {}
591 byfile = {}
596 for entry in ledger.entries.values():
592 for entry in ledger.entries.values():
597 if entry.datasource:
593 if entry.datasource:
598 byfile.setdefault(entry.filename, {})[entry.node] = entry
594 byfile.setdefault(entry.filename, {})[entry.node] = entry
599
595
600 count = 0
596 count = 0
601 repackprogress = ui.makeprogress(
597 repackprogress = ui.makeprogress(
602 _(b"repacking data"), unit=self.unit, total=len(byfile)
598 _(b"repacking data"), unit=self.unit, total=len(byfile)
603 )
599 )
604 for filename, entries in sorted(byfile.items()):
600 for filename, entries in sorted(byfile.items()):
605 repackprogress.update(count)
601 repackprogress.update(count)
606
602
607 ancestors = {}
603 ancestors = {}
608 nodes = list(node for node in entries)
604 nodes = list(node for node in entries)
609 nohistory = []
605 nohistory = []
610 buildprogress = ui.makeprogress(
606 buildprogress = ui.makeprogress(
611 _(b"building history"), unit=b'nodes', total=len(nodes)
607 _(b"building history"), unit=b'nodes', total=len(nodes)
612 )
608 )
613 for i, node in enumerate(nodes):
609 for i, node in enumerate(nodes):
614 if node in ancestors:
610 if node in ancestors:
615 continue
611 continue
616 buildprogress.update(i)
612 buildprogress.update(i)
617 try:
613 try:
618 ancestors.update(
614 ancestors.update(
619 self.fullhistory.getancestors(
615 self.fullhistory.getancestors(
620 filename, node, known=ancestors
616 filename, node, known=ancestors
621 )
617 )
622 )
618 )
623 except KeyError:
619 except KeyError:
624 # Since we're packing data entries, we may not have the
620 # Since we're packing data entries, we may not have the
625 # corresponding history entries for them. It's not a big
621 # corresponding history entries for them. It's not a big
626 # deal, but the entries won't be delta'd perfectly.
622 # deal, but the entries won't be delta'd perfectly.
627 nohistory.append(node)
623 nohistory.append(node)
628 buildprogress.complete()
624 buildprogress.complete()
629
625
630 # Order the nodes children first, so we can produce reverse deltas
626 # Order the nodes children first, so we can produce reverse deltas
631 orderednodes = list(reversed(self._toposort(ancestors)))
627 orderednodes = list(reversed(self._toposort(ancestors)))
632 if len(nohistory) > 0:
628 if len(nohistory) > 0:
633 ui.debug(
629 ui.debug(
634 b'repackdata: %d nodes without history\n' % len(nohistory)
630 b'repackdata: %d nodes without history\n' % len(nohistory)
635 )
631 )
636 orderednodes.extend(sorted(nohistory))
632 orderednodes.extend(sorted(nohistory))
637
633
638 # Filter orderednodes to just the nodes we want to serialize (it
634 # Filter orderednodes to just the nodes we want to serialize (it
639 # currently also has the edge nodes' ancestors).
635 # currently also has the edge nodes' ancestors).
640 orderednodes = list(
636 orderednodes = list(
641 filter(lambda node: node in nodes, orderednodes)
637 filter(lambda node: node in nodes, orderednodes)
642 )
638 )
643
639
644 # Garbage collect old nodes:
640 # Garbage collect old nodes:
645 if self.garbagecollect:
641 if self.garbagecollect:
646 neworderednodes = []
642 neworderednodes = []
647 for node in orderednodes:
643 for node in orderednodes:
648 # If the node is old and is not in the keepset, we skip it,
644 # If the node is old and is not in the keepset, we skip it,
649 # and mark as garbage collected
645 # and mark as garbage collected
650 if (filename, node) not in self.keepkeys and self.isold(
646 if (filename, node) not in self.keepkeys and self.isold(
651 self.repo, filename, node
647 self.repo, filename, node
652 ):
648 ):
653 entries[node].gced = True
649 entries[node].gced = True
654 continue
650 continue
655 neworderednodes.append(node)
651 neworderednodes.append(node)
656 orderednodes = neworderednodes
652 orderednodes = neworderednodes
657
653
658 # Compute delta bases for nodes:
654 # Compute delta bases for nodes:
659 deltabases = {}
655 deltabases = {}
660 nobase = set()
656 nobase = set()
661 referenced = set()
657 referenced = set()
662 nodes = set(nodes)
658 nodes = set(nodes)
663 processprogress = ui.makeprogress(
659 processprogress = ui.makeprogress(
664 _(b"processing nodes"), unit=b'nodes', total=len(orderednodes)
660 _(b"processing nodes"), unit=b'nodes', total=len(orderednodes)
665 )
661 )
666 for i, node in enumerate(orderednodes):
662 for i, node in enumerate(orderednodes):
667 processprogress.update(i)
663 processprogress.update(i)
668 # Find delta base
664 # Find delta base
669 # TODO: allow delta'ing against most recent descendant instead
665 # TODO: allow delta'ing against most recent descendant instead
670 # of immediate child
666 # of immediate child
671 deltatuple = deltabases.get(node, None)
667 deltatuple = deltabases.get(node, None)
672 if deltatuple is None:
668 if deltatuple is None:
673 deltabase, chainlen = self.repo.nullid, 0
669 deltabase, chainlen = self.repo.nullid, 0
674 deltabases[node] = (self.repo.nullid, 0)
670 deltabases[node] = (self.repo.nullid, 0)
675 nobase.add(node)
671 nobase.add(node)
676 else:
672 else:
677 deltabase, chainlen = deltatuple
673 deltabase, chainlen = deltatuple
678 referenced.add(deltabase)
674 referenced.add(deltabase)
679
675
680 # Use available ancestor information to inform our delta choices
676 # Use available ancestor information to inform our delta choices
681 ancestorinfo = ancestors.get(node)
677 ancestorinfo = ancestors.get(node)
682 if ancestorinfo:
678 if ancestorinfo:
683 p1, p2, linknode, copyfrom = ancestorinfo
679 p1, p2, linknode, copyfrom = ancestorinfo
684
680
685 # The presence of copyfrom means we're at a point where the
681 # The presence of copyfrom means we're at a point where the
686 # file was copied from elsewhere. So don't attempt to do any
682 # file was copied from elsewhere. So don't attempt to do any
687 # deltas with the other file.
683 # deltas with the other file.
688 if copyfrom:
684 if copyfrom:
689 p1 = self.repo.nullid
685 p1 = self.repo.nullid
690
686
691 if chainlen < maxchainlen:
687 if chainlen < maxchainlen:
692 # Record this child as the delta base for its parents.
688 # Record this child as the delta base for its parents.
693 # This may be non optimal, since the parents may have
689 # This may be non optimal, since the parents may have
694 # many children, and this will only choose the last one.
690 # many children, and this will only choose the last one.
695 # TODO: record all children and try all deltas to find
691 # TODO: record all children and try all deltas to find
696 # best
692 # best
697 if p1 != self.repo.nullid:
693 if p1 != self.repo.nullid:
698 deltabases[p1] = (node, chainlen + 1)
694 deltabases[p1] = (node, chainlen + 1)
699 if p2 != self.repo.nullid:
695 if p2 != self.repo.nullid:
700 deltabases[p2] = (node, chainlen + 1)
696 deltabases[p2] = (node, chainlen + 1)
701
697
702 # experimental config: repack.chainorphansbysize
698 # experimental config: repack.chainorphansbysize
703 if ui.configbool(b'repack', b'chainorphansbysize'):
699 if ui.configbool(b'repack', b'chainorphansbysize'):
704 orphans = nobase - referenced
700 orphans = nobase - referenced
705 orderednodes = self._chainorphans(
701 orderednodes = self._chainorphans(
706 ui, filename, orderednodes, orphans, deltabases
702 ui, filename, orderednodes, orphans, deltabases
707 )
703 )
708
704
709 # Compute deltas and write to the pack
705 # Compute deltas and write to the pack
710 for i, node in enumerate(orderednodes):
706 for i, node in enumerate(orderednodes):
711 deltabase, chainlen = deltabases[node]
707 deltabase, chainlen = deltabases[node]
712 # Compute delta
708 # Compute delta
713 # TODO: Optimize the deltachain fetching. Since we're
709 # TODO: Optimize the deltachain fetching. Since we're
714 # iterating over the different version of the file, we may
710 # iterating over the different version of the file, we may
715 # be fetching the same deltachain over and over again.
711 # be fetching the same deltachain over and over again.
716 if deltabase != self.repo.nullid:
712 if deltabase != self.repo.nullid:
717 deltaentry = self.data.getdelta(filename, node)
713 deltaentry = self.data.getdelta(filename, node)
718 delta, deltabasename, origdeltabase, meta = deltaentry
714 delta, deltabasename, origdeltabase, meta = deltaentry
719 size = meta.get(constants.METAKEYSIZE)
715 size = meta.get(constants.METAKEYSIZE)
720 if (
716 if (
721 deltabasename != filename
717 deltabasename != filename
722 or origdeltabase != deltabase
718 or origdeltabase != deltabase
723 or size is None
719 or size is None
724 ):
720 ):
725 deltabasetext = self.data.get(filename, deltabase)
721 deltabasetext = self.data.get(filename, deltabase)
726 original = self.data.get(filename, node)
722 original = self.data.get(filename, node)
727 size = len(original)
723 size = len(original)
728 delta = mdiff.textdiff(deltabasetext, original)
724 delta = mdiff.textdiff(deltabasetext, original)
729 else:
725 else:
730 delta = self.data.get(filename, node)
726 delta = self.data.get(filename, node)
731 size = len(delta)
727 size = len(delta)
732 meta = self.data.getmeta(filename, node)
728 meta = self.data.getmeta(filename, node)
733
729
734 # TODO: don't use the delta if it's larger than the fulltext
730 # TODO: don't use the delta if it's larger than the fulltext
735 if constants.METAKEYSIZE not in meta:
731 if constants.METAKEYSIZE not in meta:
736 meta[constants.METAKEYSIZE] = size
732 meta[constants.METAKEYSIZE] = size
737 target.add(filename, node, deltabase, delta, meta)
733 target.add(filename, node, deltabase, delta, meta)
738
734
739 entries[node].datarepacked = True
735 entries[node].datarepacked = True
740
736
741 processprogress.complete()
737 processprogress.complete()
742 count += 1
738 count += 1
743
739
744 repackprogress.complete()
740 repackprogress.complete()
745 target.close(ledger=ledger)
741 target.close(ledger=ledger)
746
742
747 def repackhistory(self, ledger, target):
743 def repackhistory(self, ledger, target):
748 ui = self.repo.ui
744 ui = self.repo.ui
749
745
750 byfile = {}
746 byfile = {}
751 for entry in ledger.entries.values():
747 for entry in ledger.entries.values():
752 if entry.historysource:
748 if entry.historysource:
753 byfile.setdefault(entry.filename, {})[entry.node] = entry
749 byfile.setdefault(entry.filename, {})[entry.node] = entry
754
750
755 progress = ui.makeprogress(
751 progress = ui.makeprogress(
756 _(b"repacking history"), unit=self.unit, total=len(byfile)
752 _(b"repacking history"), unit=self.unit, total=len(byfile)
757 )
753 )
758 for filename, entries in sorted(byfile.items()):
754 for filename, entries in sorted(byfile.items()):
759 ancestors = {}
755 ancestors = {}
760 nodes = list(node for node in entries)
756 nodes = list(node for node in entries)
761
757
762 for node in nodes:
758 for node in nodes:
763 if node in ancestors:
759 if node in ancestors:
764 continue
760 continue
765 ancestors.update(
761 ancestors.update(
766 self.history.getancestors(filename, node, known=ancestors)
762 self.history.getancestors(filename, node, known=ancestors)
767 )
763 )
768
764
769 # Order the nodes children first
765 # Order the nodes children first
770 orderednodes = reversed(self._toposort(ancestors))
766 orderednodes = reversed(self._toposort(ancestors))
771
767
772 # Write to the pack
768 # Write to the pack
773 dontprocess = set()
769 dontprocess = set()
774 for node in orderednodes:
770 for node in orderednodes:
775 p1, p2, linknode, copyfrom = ancestors[node]
771 p1, p2, linknode, copyfrom = ancestors[node]
776
772
777 # If the node is marked dontprocess, but it's also in the
773 # If the node is marked dontprocess, but it's also in the
778 # explicit entries set, that means the node exists both in this
774 # explicit entries set, that means the node exists both in this
779 # file and in another file that was copied to this file.
775 # file and in another file that was copied to this file.
780 # Usually this happens if the file was copied to another file,
776 # Usually this happens if the file was copied to another file,
781 # then the copy was deleted, then reintroduced without copy
777 # then the copy was deleted, then reintroduced without copy
782 # metadata. The original add and the new add have the same hash
778 # metadata. The original add and the new add have the same hash
783 # since the content is identical and the parents are null.
779 # since the content is identical and the parents are null.
784 if node in dontprocess and node not in entries:
780 if node in dontprocess and node not in entries:
785 # If copyfrom == filename, it means the copy history
781 # If copyfrom == filename, it means the copy history
786 # went to come other file, then came back to this one, so we
782 # went to come other file, then came back to this one, so we
787 # should continue processing it.
783 # should continue processing it.
788 if p1 != self.repo.nullid and copyfrom != filename:
784 if p1 != self.repo.nullid and copyfrom != filename:
789 dontprocess.add(p1)
785 dontprocess.add(p1)
790 if p2 != self.repo.nullid:
786 if p2 != self.repo.nullid:
791 dontprocess.add(p2)
787 dontprocess.add(p2)
792 continue
788 continue
793
789
794 if copyfrom:
790 if copyfrom:
795 dontprocess.add(p1)
791 dontprocess.add(p1)
796
792
797 target.add(filename, node, p1, p2, linknode, copyfrom)
793 target.add(filename, node, p1, p2, linknode, copyfrom)
798
794
799 if node in entries:
795 if node in entries:
800 entries[node].historyrepacked = True
796 entries[node].historyrepacked = True
801
797
802 progress.increment()
798 progress.increment()
803
799
804 progress.complete()
800 progress.complete()
805 target.close(ledger=ledger)
801 target.close(ledger=ledger)
806
802
807 def _toposort(self, ancestors):
803 def _toposort(self, ancestors):
808 def parentfunc(node):
804 def parentfunc(node):
809 p1, p2, linknode, copyfrom = ancestors[node]
805 p1, p2, linknode, copyfrom = ancestors[node]
810 parents = []
806 parents = []
811 if p1 != self.repo.nullid:
807 if p1 != self.repo.nullid:
812 parents.append(p1)
808 parents.append(p1)
813 if p2 != self.repo.nullid:
809 if p2 != self.repo.nullid:
814 parents.append(p2)
810 parents.append(p2)
815 return parents
811 return parents
816
812
817 sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
813 sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
818 return sortednodes
814 return sortednodes
819
815
820
816
821 class repackledger:
817 class repackledger:
822 """Storage for all the bookkeeping that happens during a repack. It contains
818 """Storage for all the bookkeeping that happens during a repack. It contains
823 the list of revisions being repacked, what happened to each revision, and
819 the list of revisions being repacked, what happened to each revision, and
824 which source store contained which revision originally (for later cleanup).
820 which source store contained which revision originally (for later cleanup).
825 """
821 """
826
822
827 def __init__(self):
823 def __init__(self):
828 self.entries = {}
824 self.entries = {}
829 self.sources = {}
825 self.sources = {}
830 self.created = set()
826 self.created = set()
831
827
832 def markdataentry(self, source, filename, node):
828 def markdataentry(self, source, filename, node):
833 """Mark the given filename+node revision as having a data rev in the
829 """Mark the given filename+node revision as having a data rev in the
834 given source.
830 given source.
835 """
831 """
836 entry = self._getorcreateentry(filename, node)
832 entry = self._getorcreateentry(filename, node)
837 entry.datasource = True
833 entry.datasource = True
838 entries = self.sources.get(source)
834 entries = self.sources.get(source)
839 if not entries:
835 if not entries:
840 entries = set()
836 entries = set()
841 self.sources[source] = entries
837 self.sources[source] = entries
842 entries.add(entry)
838 entries.add(entry)
843
839
844 def markhistoryentry(self, source, filename, node):
840 def markhistoryentry(self, source, filename, node):
845 """Mark the given filename+node revision as having a history rev in the
841 """Mark the given filename+node revision as having a history rev in the
846 given source.
842 given source.
847 """
843 """
848 entry = self._getorcreateentry(filename, node)
844 entry = self._getorcreateentry(filename, node)
849 entry.historysource = True
845 entry.historysource = True
850 entries = self.sources.get(source)
846 entries = self.sources.get(source)
851 if not entries:
847 if not entries:
852 entries = set()
848 entries = set()
853 self.sources[source] = entries
849 self.sources[source] = entries
854 entries.add(entry)
850 entries.add(entry)
855
851
856 def _getorcreateentry(self, filename, node):
852 def _getorcreateentry(self, filename, node):
857 key = (filename, node)
853 key = (filename, node)
858 value = self.entries.get(key)
854 value = self.entries.get(key)
859 if not value:
855 if not value:
860 value = repackentry(filename, node)
856 value = repackentry(filename, node)
861 self.entries[key] = value
857 self.entries[key] = value
862
858
863 return value
859 return value
864
860
865 def addcreated(self, value):
861 def addcreated(self, value):
866 self.created.add(value)
862 self.created.add(value)
867
863
868
864
869 class repackentry:
865 class repackentry:
870 """Simple class representing a single revision entry in the repackledger."""
866 """Simple class representing a single revision entry in the repackledger."""
871
867
872 __slots__ = (
868 __slots__ = (
873 'filename',
869 'filename',
874 'node',
870 'node',
875 'datasource',
871 'datasource',
876 'historysource',
872 'historysource',
877 'datarepacked',
873 'datarepacked',
878 'historyrepacked',
874 'historyrepacked',
879 'gced',
875 'gced',
880 )
876 )
881
877
882 def __init__(self, filename, node):
878 def __init__(self, filename, node):
883 self.filename = filename
879 self.filename = filename
884 self.node = node
880 self.node = node
885 # If the revision has a data entry in the source
881 # If the revision has a data entry in the source
886 self.datasource = False
882 self.datasource = False
887 # If the revision has a history entry in the source
883 # If the revision has a history entry in the source
888 self.historysource = False
884 self.historysource = False
889 # If the revision's data entry was repacked into the repack target
885 # If the revision's data entry was repacked into the repack target
890 self.datarepacked = False
886 self.datarepacked = False
891 # If the revision's history entry was repacked into the repack target
887 # If the revision's history entry was repacked into the repack target
892 self.historyrepacked = False
888 self.historyrepacked = False
893 # If garbage collected
889 # If garbage collected
894 self.gced = False
890 self.gced = False
895
891
896
892
897 def repacklockvfs(repo):
893 def repacklockvfs(repo):
898 if hasattr(repo, 'name'):
894 if hasattr(repo, 'name'):
899 # Lock in the shared cache so repacks across multiple copies of the same
895 # Lock in the shared cache so repacks across multiple copies of the same
900 # repo are coordinated.
896 # repo are coordinated.
901 sharedcachepath = shallowutil.getcachepackpath(
897 sharedcachepath = shallowutil.getcachepackpath(
902 repo, constants.FILEPACK_CATEGORY
898 repo, constants.FILEPACK_CATEGORY
903 )
899 )
904 return vfs.vfs(sharedcachepath)
900 return vfs.vfs(sharedcachepath)
905 else:
901 else:
906 return repo.svfs
902 return repo.svfs
General Comments 0
You need to be logged in to leave comments. Login now