##// END OF EJS Templates
remotefilelog: do not specify an explicit version for repack...
Kyle Lippincott -
r41971:118c1ec4 default
parent child Browse files
Show More
@@ -1,777 +1,777 b''
1 1 from __future__ import absolute_import
2 2
3 3 import os
4 4 import time
5 5
6 6 from mercurial.i18n import _
7 7 from mercurial.node import (
8 8 nullid,
9 9 short,
10 10 )
11 11 from mercurial import (
12 12 encoding,
13 13 error,
14 14 mdiff,
15 15 policy,
16 16 pycompat,
17 17 scmutil,
18 18 util,
19 19 vfs,
20 20 )
21 21 from mercurial.utils import procutil
22 22 from . import (
23 23 constants,
24 24 contentstore,
25 25 datapack,
26 26 extutil,
27 27 historypack,
28 28 metadatastore,
29 29 shallowutil,
30 30 )
31 31
32 32 osutil = policy.importmod(r'osutil')
33 33
34 34 class RepackAlreadyRunning(error.Abort):
35 35 pass
36 36
37 37 def backgroundrepack(repo, incremental=True, packsonly=False):
38 38 cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
39 39 msg = _("(running background repack)\n")
40 40 if incremental:
41 41 cmd.append('--incremental')
42 42 msg = _("(running background incremental repack)\n")
43 43 if packsonly:
44 44 cmd.append('--packsonly')
45 45 repo.ui.warn(msg)
46 46 procutil.runbgcommand(cmd, encoding.environ)
47 47
48 48 def fullrepack(repo, options=None):
49 49 """If ``packsonly`` is True, stores creating only loose objects are skipped.
50 50 """
51 51 if util.safehasattr(repo, 'shareddatastores'):
52 52 datasource = contentstore.unioncontentstore(
53 53 *repo.shareddatastores)
54 54 historysource = metadatastore.unionmetadatastore(
55 55 *repo.sharedhistorystores,
56 56 allowincomplete=True)
57 57
58 58 packpath = shallowutil.getcachepackpath(
59 59 repo,
60 60 constants.FILEPACK_CATEGORY)
61 61 _runrepack(repo, datasource, historysource, packpath,
62 62 constants.FILEPACK_CATEGORY, options=options)
63 63
64 64 if util.safehasattr(repo.manifestlog, 'datastore'):
65 65 localdata, shareddata = _getmanifeststores(repo)
66 66 lpackpath, ldstores, lhstores = localdata
67 67 spackpath, sdstores, shstores = shareddata
68 68
69 69 # Repack the shared manifest store
70 70 datasource = contentstore.unioncontentstore(*sdstores)
71 71 historysource = metadatastore.unionmetadatastore(
72 72 *shstores,
73 73 allowincomplete=True)
74 74 _runrepack(repo, datasource, historysource, spackpath,
75 75 constants.TREEPACK_CATEGORY, options=options)
76 76
77 77 # Repack the local manifest store
78 78 datasource = contentstore.unioncontentstore(
79 79 *ldstores,
80 80 allowincomplete=True)
81 81 historysource = metadatastore.unionmetadatastore(
82 82 *lhstores,
83 83 allowincomplete=True)
84 84 _runrepack(repo, datasource, historysource, lpackpath,
85 85 constants.TREEPACK_CATEGORY, options=options)
86 86
87 87 def incrementalrepack(repo, options=None):
88 88 """This repacks the repo by looking at the distribution of pack files in the
89 89 repo and performing the most minimal repack to keep the repo in good shape.
90 90 """
91 91 if util.safehasattr(repo, 'shareddatastores'):
92 92 packpath = shallowutil.getcachepackpath(
93 93 repo,
94 94 constants.FILEPACK_CATEGORY)
95 95 _incrementalrepack(repo,
96 96 repo.shareddatastores,
97 97 repo.sharedhistorystores,
98 98 packpath,
99 99 constants.FILEPACK_CATEGORY,
100 100 options=options)
101 101
102 102 if util.safehasattr(repo.manifestlog, 'datastore'):
103 103 localdata, shareddata = _getmanifeststores(repo)
104 104 lpackpath, ldstores, lhstores = localdata
105 105 spackpath, sdstores, shstores = shareddata
106 106
107 107 # Repack the shared manifest store
108 108 _incrementalrepack(repo,
109 109 sdstores,
110 110 shstores,
111 111 spackpath,
112 112 constants.TREEPACK_CATEGORY,
113 113 options=options)
114 114
115 115 # Repack the local manifest store
116 116 _incrementalrepack(repo,
117 117 ldstores,
118 118 lhstores,
119 119 lpackpath,
120 120 constants.TREEPACK_CATEGORY,
121 121 allowincompletedata=True,
122 122 options=options)
123 123
124 124 def _getmanifeststores(repo):
125 125 shareddatastores = repo.manifestlog.shareddatastores
126 126 localdatastores = repo.manifestlog.localdatastores
127 127 sharedhistorystores = repo.manifestlog.sharedhistorystores
128 128 localhistorystores = repo.manifestlog.localhistorystores
129 129
130 130 sharedpackpath = shallowutil.getcachepackpath(repo,
131 131 constants.TREEPACK_CATEGORY)
132 132 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
133 133 constants.TREEPACK_CATEGORY)
134 134
135 135 return ((localpackpath, localdatastores, localhistorystores),
136 136 (sharedpackpath, shareddatastores, sharedhistorystores))
137 137
138 138 def _topacks(packpath, files, constructor):
139 139 paths = list(os.path.join(packpath, p) for p in files)
140 140 packs = list(constructor(p) for p in paths)
141 141 return packs
142 142
143 143 def _deletebigpacks(repo, folder, files):
144 144 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
145 145
146 146 Returns ``files` with the removed files omitted."""
147 147 maxsize = repo.ui.configbytes("packs", "maxpacksize")
148 148 if maxsize <= 0:
149 149 return files
150 150
151 151 # This only considers datapacks today, but we could broaden it to include
152 152 # historypacks.
153 153 VALIDEXTS = [".datapack", ".dataidx"]
154 154
155 155 # Either an oversize index or datapack will trigger cleanup of the whole
156 156 # pack:
157 157 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
158 158 if (stat.st_size > maxsize and (os.path.splitext(path)[1]
159 159 in VALIDEXTS))])
160 160
161 161 for rootfname in oversized:
162 162 rootpath = os.path.join(folder, rootfname)
163 163 for ext in VALIDEXTS:
164 164 path = rootpath + ext
165 165 repo.ui.debug('removing oversize packfile %s (%s)\n' %
166 166 (path, util.bytecount(os.stat(path).st_size)))
167 167 os.unlink(path)
168 168 return [row for row in files if os.path.basename(row[0]) not in oversized]
169 169
170 170 def _incrementalrepack(repo, datastore, historystore, packpath, category,
171 171 allowincompletedata=False, options=None):
172 172 shallowutil.mkstickygroupdir(repo.ui, packpath)
173 173
174 174 files = osutil.listdir(packpath, stat=True)
175 175 files = _deletebigpacks(repo, packpath, files)
176 176 datapacks = _topacks(packpath,
177 177 _computeincrementaldatapack(repo.ui, files),
178 178 datapack.datapack)
179 179 datapacks.extend(s for s in datastore
180 180 if not isinstance(s, datapack.datapackstore))
181 181
182 182 historypacks = _topacks(packpath,
183 183 _computeincrementalhistorypack(repo.ui, files),
184 184 historypack.historypack)
185 185 historypacks.extend(s for s in historystore
186 186 if not isinstance(s, historypack.historypackstore))
187 187
188 188 # ``allhistory{files,packs}`` contains all known history packs, even ones we
189 189 # don't plan to repack. They are used during the datapack repack to ensure
190 190 # good ordering of nodes.
191 191 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
192 192 historypack.INDEXSUFFIX)
193 193 allhistorypacks = _topacks(packpath,
194 194 (f for f, mode, stat in allhistoryfiles),
195 195 historypack.historypack)
196 196 allhistorypacks.extend(s for s in historystore
197 197 if not isinstance(s, historypack.historypackstore))
198 198 _runrepack(repo,
199 199 contentstore.unioncontentstore(
200 200 *datapacks,
201 201 allowincomplete=allowincompletedata),
202 202 metadatastore.unionmetadatastore(
203 203 *historypacks,
204 204 allowincomplete=True),
205 205 packpath, category,
206 206 fullhistory=metadatastore.unionmetadatastore(
207 207 *allhistorypacks,
208 208 allowincomplete=True),
209 209 options=options)
210 210
211 211 def _computeincrementaldatapack(ui, files):
212 212 opts = {
213 213 'gencountlimit' : ui.configint(
214 214 'remotefilelog', 'data.gencountlimit'),
215 215 'generations' : ui.configlist(
216 216 'remotefilelog', 'data.generations'),
217 217 'maxrepackpacks' : ui.configint(
218 218 'remotefilelog', 'data.maxrepackpacks'),
219 219 'repackmaxpacksize' : ui.configbytes(
220 220 'remotefilelog', 'data.repackmaxpacksize'),
221 221 'repacksizelimit' : ui.configbytes(
222 222 'remotefilelog', 'data.repacksizelimit'),
223 223 }
224 224
225 225 packfiles = _allpackfileswithsuffix(
226 226 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
227 227 return _computeincrementalpack(packfiles, opts)
228 228
229 229 def _computeincrementalhistorypack(ui, files):
230 230 opts = {
231 231 'gencountlimit' : ui.configint(
232 232 'remotefilelog', 'history.gencountlimit'),
233 233 'generations' : ui.configlist(
234 234 'remotefilelog', 'history.generations', ['100MB']),
235 235 'maxrepackpacks' : ui.configint(
236 236 'remotefilelog', 'history.maxrepackpacks'),
237 237 'repackmaxpacksize' : ui.configbytes(
238 238 'remotefilelog', 'history.repackmaxpacksize', '400MB'),
239 239 'repacksizelimit' : ui.configbytes(
240 240 'remotefilelog', 'history.repacksizelimit'),
241 241 }
242 242
243 243 packfiles = _allpackfileswithsuffix(
244 244 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
245 245 return _computeincrementalpack(packfiles, opts)
246 246
247 247 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
248 248 result = []
249 249 fileset = set(fn for fn, mode, stat in files)
250 250 for filename, mode, stat in files:
251 251 if not filename.endswith(packsuffix):
252 252 continue
253 253
254 254 prefix = filename[:-len(packsuffix)]
255 255
256 256 # Don't process a pack if it doesn't have an index.
257 257 if (prefix + indexsuffix) not in fileset:
258 258 continue
259 259 result.append((prefix, mode, stat))
260 260
261 261 return result
262 262
263 263 def _computeincrementalpack(files, opts):
264 264 """Given a set of pack files along with the configuration options, this
265 265 function computes the list of files that should be packed as part of an
266 266 incremental repack.
267 267
268 268 It tries to strike a balance between keeping incremental repacks cheap (i.e.
269 269 packing small things when possible, and rolling the packs up to the big ones
270 270 over time).
271 271 """
272 272
273 273 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
274 274 reverse=True))
275 275 limits.append(0)
276 276
277 277 # Group the packs by generation (i.e. by size)
278 278 generations = []
279 279 for i in pycompat.xrange(len(limits)):
280 280 generations.append([])
281 281
282 282 sizes = {}
283 283 for prefix, mode, stat in files:
284 284 size = stat.st_size
285 285 if size > opts['repackmaxpacksize']:
286 286 continue
287 287
288 288 sizes[prefix] = size
289 289 for i, limit in enumerate(limits):
290 290 if size > limit:
291 291 generations[i].append(prefix)
292 292 break
293 293
294 294 # Steps for picking what packs to repack:
295 295 # 1. Pick the largest generation with > gencountlimit pack files.
296 296 # 2. Take the smallest three packs.
297 297 # 3. While total-size-of-packs < repacksizelimit: add another pack
298 298
299 299 # Find the largest generation with more than gencountlimit packs
300 300 genpacks = []
301 301 for i, limit in enumerate(limits):
302 302 if len(generations[i]) > opts['gencountlimit']:
303 303 # Sort to be smallest last, for easy popping later
304 304 genpacks.extend(sorted(generations[i], reverse=True,
305 305 key=lambda x: sizes[x]))
306 306 break
307 307
308 308 # Take as many packs from the generation as we can
309 309 chosenpacks = genpacks[-3:]
310 310 genpacks = genpacks[:-3]
311 311 repacksize = sum(sizes[n] for n in chosenpacks)
312 312 while (repacksize < opts['repacksizelimit'] and genpacks and
313 313 len(chosenpacks) < opts['maxrepackpacks']):
314 314 chosenpacks.append(genpacks.pop())
315 315 repacksize += sizes[chosenpacks[-1]]
316 316
317 317 return chosenpacks
318 318
319 319 def _runrepack(repo, data, history, packpath, category, fullhistory=None,
320 320 options=None):
321 321 shallowutil.mkstickygroupdir(repo.ui, packpath)
322 322
323 323 def isold(repo, filename, node):
324 324 """Check if the file node is older than a limit.
325 325 Unless a limit is specified in the config the default limit is taken.
326 326 """
327 327 filectx = repo.filectx(filename, fileid=node)
328 328 filetime = repo[filectx.linkrev()].date()
329 329
330 330 ttl = repo.ui.configint('remotefilelog', 'nodettl')
331 331
332 332 limit = time.time() - ttl
333 333 return filetime[0] < limit
334 334
335 335 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
336 336 if not fullhistory:
337 337 fullhistory = history
338 338 packer = repacker(repo, data, history, fullhistory, category,
339 339 gc=garbagecollect, isold=isold, options=options)
340 340
341 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
341 with datapack.mutabledatapack(repo.ui, packpath) as dpack:
342 342 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
343 343 try:
344 344 packer.run(dpack, hpack)
345 345 except error.LockHeld:
346 346 raise RepackAlreadyRunning(_("skipping repack - another repack "
347 347 "is already running"))
348 348
349 349 def keepset(repo, keyfn, lastkeepkeys=None):
350 350 """Computes a keepset which is not garbage collected.
351 351 'keyfn' is a function that maps filename, node to a unique key.
352 352 'lastkeepkeys' is an optional argument and if provided the keepset
353 353 function updates lastkeepkeys with more keys and returns the result.
354 354 """
355 355 if not lastkeepkeys:
356 356 keepkeys = set()
357 357 else:
358 358 keepkeys = lastkeepkeys
359 359
360 360 # We want to keep:
361 361 # 1. Working copy parent
362 362 # 2. Draft commits
363 363 # 3. Parents of draft commits
364 364 # 4. Pullprefetch and bgprefetchrevs revsets if specified
365 365 revs = ['.', 'draft()', 'parents(draft())']
366 366 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
367 367 if prefetchrevs:
368 368 revs.append('(%s)' % prefetchrevs)
369 369 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
370 370 if prefetchrevs:
371 371 revs.append('(%s)' % prefetchrevs)
372 372 revs = '+'.join(revs)
373 373
374 374 revs = ['sort((%s), "topo")' % revs]
375 375 keep = scmutil.revrange(repo, revs)
376 376
377 377 processed = set()
378 378 lastmanifest = None
379 379
380 380 # process the commits in toposorted order starting from the oldest
381 381 for r in reversed(keep._list):
382 382 if repo[r].p1().rev() in processed:
383 383 # if the direct parent has already been processed
384 384 # then we only need to process the delta
385 385 m = repo[r].manifestctx().readdelta()
386 386 else:
387 387 # otherwise take the manifest and diff it
388 388 # with the previous manifest if one exists
389 389 if lastmanifest:
390 390 m = repo[r].manifest().diff(lastmanifest)
391 391 else:
392 392 m = repo[r].manifest()
393 393 lastmanifest = repo[r].manifest()
394 394 processed.add(r)
395 395
396 396 # populate keepkeys with keys from the current manifest
397 397 if type(m) is dict:
398 398 # m is a result of diff of two manifests and is a dictionary that
399 399 # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
400 400 for filename, diff in m.iteritems():
401 401 if diff[0][0] is not None:
402 402 keepkeys.add(keyfn(filename, diff[0][0]))
403 403 else:
404 404 # m is a manifest object
405 405 for filename, filenode in m.iteritems():
406 406 keepkeys.add(keyfn(filename, filenode))
407 407
408 408 return keepkeys
409 409
410 410 class repacker(object):
411 411 """Class for orchestrating the repack of data and history information into a
412 412 new format.
413 413 """
414 414 def __init__(self, repo, data, history, fullhistory, category, gc=False,
415 415 isold=None, options=None):
416 416 self.repo = repo
417 417 self.data = data
418 418 self.history = history
419 419 self.fullhistory = fullhistory
420 420 self.unit = constants.getunits(category)
421 421 self.garbagecollect = gc
422 422 self.options = options
423 423 if self.garbagecollect:
424 424 if not isold:
425 425 raise ValueError("Function 'isold' is not properly specified")
426 426 # use (filename, node) tuple as a keepset key
427 427 self.keepkeys = keepset(repo, lambda f, n : (f, n))
428 428 self.isold = isold
429 429
430 430 def run(self, targetdata, targethistory):
431 431 ledger = repackledger()
432 432
433 433 with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
434 434 _('repacking %s') % self.repo.origroot, timeout=0):
435 435 self.repo.hook('prerepack')
436 436
437 437 # Populate ledger from source
438 438 self.data.markledger(ledger, options=self.options)
439 439 self.history.markledger(ledger, options=self.options)
440 440
441 441 # Run repack
442 442 self.repackdata(ledger, targetdata)
443 443 self.repackhistory(ledger, targethistory)
444 444
445 445 # Call cleanup on each source
446 446 for source in ledger.sources:
447 447 source.cleanup(ledger)
448 448
449 449 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
450 450 """Reorderes ``orphans`` into a single chain inside ``nodes`` and
451 451 ``deltabases``.
452 452
453 453 We often have orphan entries (nodes without a base that aren't
454 454 referenced by other nodes -- i.e., part of a chain) due to gaps in
455 455 history. Rather than store them as individual fulltexts, we prefer to
456 456 insert them as one chain sorted by size.
457 457 """
458 458 if not orphans:
459 459 return nodes
460 460
461 461 def getsize(node, default=0):
462 462 meta = self.data.getmeta(filename, node)
463 463 if constants.METAKEYSIZE in meta:
464 464 return meta[constants.METAKEYSIZE]
465 465 else:
466 466 return default
467 467
468 468 # Sort orphans by size; biggest first is preferred, since it's more
469 469 # likely to be the newest version assuming files grow over time.
470 470 # (Sort by node first to ensure the sort is stable.)
471 471 orphans = sorted(orphans)
472 472 orphans = list(sorted(orphans, key=getsize, reverse=True))
473 473 if ui.debugflag:
474 474 ui.debug("%s: orphan chain: %s\n" % (filename,
475 475 ", ".join([short(s) for s in orphans])))
476 476
477 477 # Create one contiguous chain and reassign deltabases.
478 478 for i, node in enumerate(orphans):
479 479 if i == 0:
480 480 deltabases[node] = (nullid, 0)
481 481 else:
482 482 parent = orphans[i - 1]
483 483 deltabases[node] = (parent, deltabases[parent][1] + 1)
484 484 nodes = [n for n in nodes if n not in orphans]
485 485 nodes += orphans
486 486 return nodes
487 487
488 488 def repackdata(self, ledger, target):
489 489 ui = self.repo.ui
490 490 maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
491 491
492 492 byfile = {}
493 493 for entry in ledger.entries.itervalues():
494 494 if entry.datasource:
495 495 byfile.setdefault(entry.filename, {})[entry.node] = entry
496 496
497 497 count = 0
498 498 repackprogress = ui.makeprogress(_("repacking data"), unit=self.unit,
499 499 total=len(byfile))
500 500 for filename, entries in sorted(byfile.iteritems()):
501 501 repackprogress.update(count)
502 502
503 503 ancestors = {}
504 504 nodes = list(node for node in entries)
505 505 nohistory = []
506 506 buildprogress = ui.makeprogress(_("building history"), unit='nodes',
507 507 total=len(nodes))
508 508 for i, node in enumerate(nodes):
509 509 if node in ancestors:
510 510 continue
511 511 buildprogress.update(i)
512 512 try:
513 513 ancestors.update(self.fullhistory.getancestors(filename,
514 514 node, known=ancestors))
515 515 except KeyError:
516 516 # Since we're packing data entries, we may not have the
517 517 # corresponding history entries for them. It's not a big
518 518 # deal, but the entries won't be delta'd perfectly.
519 519 nohistory.append(node)
520 520 buildprogress.complete()
521 521
522 522 # Order the nodes children first, so we can produce reverse deltas
523 523 orderednodes = list(reversed(self._toposort(ancestors)))
524 524 if len(nohistory) > 0:
525 525 ui.debug('repackdata: %d nodes without history\n' %
526 526 len(nohistory))
527 527 orderednodes.extend(sorted(nohistory))
528 528
529 529 # Filter orderednodes to just the nodes we want to serialize (it
530 530 # currently also has the edge nodes' ancestors).
531 531 orderednodes = list(filter(lambda node: node in nodes,
532 532 orderednodes))
533 533
534 534 # Garbage collect old nodes:
535 535 if self.garbagecollect:
536 536 neworderednodes = []
537 537 for node in orderednodes:
538 538 # If the node is old and is not in the keepset, we skip it,
539 539 # and mark as garbage collected
540 540 if ((filename, node) not in self.keepkeys and
541 541 self.isold(self.repo, filename, node)):
542 542 entries[node].gced = True
543 543 continue
544 544 neworderednodes.append(node)
545 545 orderednodes = neworderednodes
546 546
547 547 # Compute delta bases for nodes:
548 548 deltabases = {}
549 549 nobase = set()
550 550 referenced = set()
551 551 nodes = set(nodes)
552 552 processprogress = ui.makeprogress(_("processing nodes"),
553 553 unit='nodes',
554 554 total=len(orderednodes))
555 555 for i, node in enumerate(orderednodes):
556 556 processprogress.update(i)
557 557 # Find delta base
558 558 # TODO: allow delta'ing against most recent descendant instead
559 559 # of immediate child
560 560 deltatuple = deltabases.get(node, None)
561 561 if deltatuple is None:
562 562 deltabase, chainlen = nullid, 0
563 563 deltabases[node] = (nullid, 0)
564 564 nobase.add(node)
565 565 else:
566 566 deltabase, chainlen = deltatuple
567 567 referenced.add(deltabase)
568 568
569 569 # Use available ancestor information to inform our delta choices
570 570 ancestorinfo = ancestors.get(node)
571 571 if ancestorinfo:
572 572 p1, p2, linknode, copyfrom = ancestorinfo
573 573
574 574 # The presence of copyfrom means we're at a point where the
575 575 # file was copied from elsewhere. So don't attempt to do any
576 576 # deltas with the other file.
577 577 if copyfrom:
578 578 p1 = nullid
579 579
580 580 if chainlen < maxchainlen:
581 581 # Record this child as the delta base for its parents.
582 582 # This may be non optimal, since the parents may have
583 583 # many children, and this will only choose the last one.
584 584 # TODO: record all children and try all deltas to find
585 585 # best
586 586 if p1 != nullid:
587 587 deltabases[p1] = (node, chainlen + 1)
588 588 if p2 != nullid:
589 589 deltabases[p2] = (node, chainlen + 1)
590 590
591 591 # experimental config: repack.chainorphansbysize
592 592 if ui.configbool('repack', 'chainorphansbysize'):
593 593 orphans = nobase - referenced
594 594 orderednodes = self._chainorphans(ui, filename, orderednodes,
595 595 orphans, deltabases)
596 596
597 597 # Compute deltas and write to the pack
598 598 for i, node in enumerate(orderednodes):
599 599 deltabase, chainlen = deltabases[node]
600 600 # Compute delta
601 601 # TODO: Optimize the deltachain fetching. Since we're
602 602 # iterating over the different version of the file, we may
603 603 # be fetching the same deltachain over and over again.
604 604 if deltabase != nullid:
605 605 deltaentry = self.data.getdelta(filename, node)
606 606 delta, deltabasename, origdeltabase, meta = deltaentry
607 607 size = meta.get(constants.METAKEYSIZE)
608 608 if (deltabasename != filename or origdeltabase != deltabase
609 609 or size is None):
610 610 deltabasetext = self.data.get(filename, deltabase)
611 611 original = self.data.get(filename, node)
612 612 size = len(original)
613 613 delta = mdiff.textdiff(deltabasetext, original)
614 614 else:
615 615 delta = self.data.get(filename, node)
616 616 size = len(delta)
617 617 meta = self.data.getmeta(filename, node)
618 618
619 619 # TODO: don't use the delta if it's larger than the fulltext
620 620 if constants.METAKEYSIZE not in meta:
621 621 meta[constants.METAKEYSIZE] = size
622 622 target.add(filename, node, deltabase, delta, meta)
623 623
624 624 entries[node].datarepacked = True
625 625
626 626 processprogress.complete()
627 627 count += 1
628 628
629 629 repackprogress.complete()
630 630 target.close(ledger=ledger)
631 631
632 632 def repackhistory(self, ledger, target):
633 633 ui = self.repo.ui
634 634
635 635 byfile = {}
636 636 for entry in ledger.entries.itervalues():
637 637 if entry.historysource:
638 638 byfile.setdefault(entry.filename, {})[entry.node] = entry
639 639
640 640 progress = ui.makeprogress(_("repacking history"), unit=self.unit,
641 641 total=len(byfile))
642 642 for filename, entries in sorted(byfile.iteritems()):
643 643 ancestors = {}
644 644 nodes = list(node for node in entries)
645 645
646 646 for node in nodes:
647 647 if node in ancestors:
648 648 continue
649 649 ancestors.update(self.history.getancestors(filename, node,
650 650 known=ancestors))
651 651
652 652 # Order the nodes children first
653 653 orderednodes = reversed(self._toposort(ancestors))
654 654
655 655 # Write to the pack
656 656 dontprocess = set()
657 657 for node in orderednodes:
658 658 p1, p2, linknode, copyfrom = ancestors[node]
659 659
660 660 # If the node is marked dontprocess, but it's also in the
661 661 # explicit entries set, that means the node exists both in this
662 662 # file and in another file that was copied to this file.
663 663 # Usually this happens if the file was copied to another file,
664 664 # then the copy was deleted, then reintroduced without copy
665 665 # metadata. The original add and the new add have the same hash
666 666 # since the content is identical and the parents are null.
667 667 if node in dontprocess and node not in entries:
668 668 # If copyfrom == filename, it means the copy history
669 669 # went to come other file, then came back to this one, so we
670 670 # should continue processing it.
671 671 if p1 != nullid and copyfrom != filename:
672 672 dontprocess.add(p1)
673 673 if p2 != nullid:
674 674 dontprocess.add(p2)
675 675 continue
676 676
677 677 if copyfrom:
678 678 dontprocess.add(p1)
679 679
680 680 target.add(filename, node, p1, p2, linknode, copyfrom)
681 681
682 682 if node in entries:
683 683 entries[node].historyrepacked = True
684 684
685 685 progress.increment()
686 686
687 687 progress.complete()
688 688 target.close(ledger=ledger)
689 689
690 690 def _toposort(self, ancestors):
691 691 def parentfunc(node):
692 692 p1, p2, linknode, copyfrom = ancestors[node]
693 693 parents = []
694 694 if p1 != nullid:
695 695 parents.append(p1)
696 696 if p2 != nullid:
697 697 parents.append(p2)
698 698 return parents
699 699
700 700 sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
701 701 return sortednodes
702 702
703 703 class repackledger(object):
704 704 """Storage for all the bookkeeping that happens during a repack. It contains
705 705 the list of revisions being repacked, what happened to each revision, and
706 706 which source store contained which revision originally (for later cleanup).
707 707 """
708 708 def __init__(self):
709 709 self.entries = {}
710 710 self.sources = {}
711 711 self.created = set()
712 712
713 713 def markdataentry(self, source, filename, node):
714 714 """Mark the given filename+node revision as having a data rev in the
715 715 given source.
716 716 """
717 717 entry = self._getorcreateentry(filename, node)
718 718 entry.datasource = True
719 719 entries = self.sources.get(source)
720 720 if not entries:
721 721 entries = set()
722 722 self.sources[source] = entries
723 723 entries.add(entry)
724 724
725 725 def markhistoryentry(self, source, filename, node):
726 726 """Mark the given filename+node revision as having a history rev in the
727 727 given source.
728 728 """
729 729 entry = self._getorcreateentry(filename, node)
730 730 entry.historysource = True
731 731 entries = self.sources.get(source)
732 732 if not entries:
733 733 entries = set()
734 734 self.sources[source] = entries
735 735 entries.add(entry)
736 736
737 737 def _getorcreateentry(self, filename, node):
738 738 key = (filename, node)
739 739 value = self.entries.get(key)
740 740 if not value:
741 741 value = repackentry(filename, node)
742 742 self.entries[key] = value
743 743
744 744 return value
745 745
746 746 def addcreated(self, value):
747 747 self.created.add(value)
748 748
749 749 class repackentry(object):
750 750 """Simple class representing a single revision entry in the repackledger.
751 751 """
752 752 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
753 753 r'datarepacked', r'historyrepacked', r'gced')
754 754 def __init__(self, filename, node):
755 755 self.filename = filename
756 756 self.node = node
757 757 # If the revision has a data entry in the source
758 758 self.datasource = False
759 759 # If the revision has a history entry in the source
760 760 self.historysource = False
761 761 # If the revision's data entry was repacked into the repack target
762 762 self.datarepacked = False
763 763 # If the revision's history entry was repacked into the repack target
764 764 self.historyrepacked = False
765 765 # If garbage collected
766 766 self.gced = False
767 767
768 768 def repacklockvfs(repo):
769 769 if util.safehasattr(repo, 'name'):
770 770 # Lock in the shared cache so repacks across multiple copies of the same
771 771 # repo are coordinated.
772 772 sharedcachepath = shallowutil.getcachepackpath(
773 773 repo,
774 774 constants.FILEPACK_CATEGORY)
775 775 return vfs.vfs(sharedcachepath)
776 776 else:
777 777 return repo.svfs
General Comments 0
You need to be logged in to leave comments. Login now