##// END OF EJS Templates
remotefilelog: drop some compatibility cruft for finding the hg exeutable
Matt Harbison -
r40747:72d88a97 default
parent child Browse files
Show More
@@ -1,781 +1,774 b''
1 1 from __future__ import absolute_import
2 2
3 3 import os
4 4 import time
5 5
6 6 from mercurial.i18n import _
7 7 from mercurial.node import (
8 8 nullid,
9 9 short,
10 10 )
11 11 from mercurial import (
12 12 encoding,
13 13 error,
14 14 mdiff,
15 15 policy,
16 16 pycompat,
17 17 scmutil,
18 18 util,
19 19 vfs,
20 20 )
21 21 from mercurial.utils import procutil
22 22 from . import (
23 23 constants,
24 24 contentstore,
25 25 datapack,
26 26 extutil,
27 27 historypack,
28 28 metadatastore,
29 29 shallowutil,
30 30 )
31 31
32 32 osutil = policy.importmod(r'osutil')
33 33
34 34 class RepackAlreadyRunning(error.Abort):
35 35 pass
36 36
37 if util.safehasattr(util, '_hgexecutable'):
38 # Before 5be286db
39 _hgexecutable = util.hgexecutable
40 else:
41 from mercurial.utils import procutil
42 _hgexecutable = procutil.hgexecutable
43
44 37 def backgroundrepack(repo, incremental=True, packsonly=False):
45 cmd = [_hgexecutable(), '-R', repo.origroot, 'repack']
38 cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
46 39 msg = _("(running background repack)\n")
47 40 if incremental:
48 41 cmd.append('--incremental')
49 42 msg = _("(running background incremental repack)\n")
50 43 if packsonly:
51 44 cmd.append('--packsonly')
52 45 repo.ui.warn(msg)
53 46 procutil.runbgcommand(cmd, encoding.environ)
54 47
55 48 def fullrepack(repo, options=None):
56 49 """If ``packsonly`` is True, stores creating only loose objects are skipped.
57 50 """
58 51 if util.safehasattr(repo, 'shareddatastores'):
59 52 datasource = contentstore.unioncontentstore(
60 53 *repo.shareddatastores)
61 54 historysource = metadatastore.unionmetadatastore(
62 55 *repo.sharedhistorystores,
63 56 allowincomplete=True)
64 57
65 58 packpath = shallowutil.getcachepackpath(
66 59 repo,
67 60 constants.FILEPACK_CATEGORY)
68 61 _runrepack(repo, datasource, historysource, packpath,
69 62 constants.FILEPACK_CATEGORY, options=options)
70 63
71 64 if util.safehasattr(repo.manifestlog, 'datastore'):
72 65 localdata, shareddata = _getmanifeststores(repo)
73 66 lpackpath, ldstores, lhstores = localdata
74 67 spackpath, sdstores, shstores = shareddata
75 68
76 69 # Repack the shared manifest store
77 70 datasource = contentstore.unioncontentstore(*sdstores)
78 71 historysource = metadatastore.unionmetadatastore(
79 72 *shstores,
80 73 allowincomplete=True)
81 74 _runrepack(repo, datasource, historysource, spackpath,
82 75 constants.TREEPACK_CATEGORY, options=options)
83 76
84 77 # Repack the local manifest store
85 78 datasource = contentstore.unioncontentstore(
86 79 *ldstores,
87 80 allowincomplete=True)
88 81 historysource = metadatastore.unionmetadatastore(
89 82 *lhstores,
90 83 allowincomplete=True)
91 84 _runrepack(repo, datasource, historysource, lpackpath,
92 85 constants.TREEPACK_CATEGORY, options=options)
93 86
94 87 def incrementalrepack(repo, options=None):
95 88 """This repacks the repo by looking at the distribution of pack files in the
96 89 repo and performing the most minimal repack to keep the repo in good shape.
97 90 """
98 91 if util.safehasattr(repo, 'shareddatastores'):
99 92 packpath = shallowutil.getcachepackpath(
100 93 repo,
101 94 constants.FILEPACK_CATEGORY)
102 95 _incrementalrepack(repo,
103 96 repo.shareddatastores,
104 97 repo.sharedhistorystores,
105 98 packpath,
106 99 constants.FILEPACK_CATEGORY,
107 100 options=options)
108 101
109 102 if util.safehasattr(repo.manifestlog, 'datastore'):
110 103 localdata, shareddata = _getmanifeststores(repo)
111 104 lpackpath, ldstores, lhstores = localdata
112 105 spackpath, sdstores, shstores = shareddata
113 106
114 107 # Repack the shared manifest store
115 108 _incrementalrepack(repo,
116 109 sdstores,
117 110 shstores,
118 111 spackpath,
119 112 constants.TREEPACK_CATEGORY,
120 113 options=options)
121 114
122 115 # Repack the local manifest store
123 116 _incrementalrepack(repo,
124 117 ldstores,
125 118 lhstores,
126 119 lpackpath,
127 120 constants.TREEPACK_CATEGORY,
128 121 allowincompletedata=True,
129 122 options=options)
130 123
131 124 def _getmanifeststores(repo):
132 125 shareddatastores = repo.manifestlog.shareddatastores
133 126 localdatastores = repo.manifestlog.localdatastores
134 127 sharedhistorystores = repo.manifestlog.sharedhistorystores
135 128 localhistorystores = repo.manifestlog.localhistorystores
136 129
137 130 sharedpackpath = shallowutil.getcachepackpath(repo,
138 131 constants.TREEPACK_CATEGORY)
139 132 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
140 133 constants.TREEPACK_CATEGORY)
141 134
142 135 return ((localpackpath, localdatastores, localhistorystores),
143 136 (sharedpackpath, shareddatastores, sharedhistorystores))
144 137
145 138 def _topacks(packpath, files, constructor):
146 139 paths = list(os.path.join(packpath, p) for p in files)
147 140 packs = list(constructor(p) for p in paths)
148 141 return packs
149 142
150 143 def _deletebigpacks(repo, folder, files):
151 144 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
152 145
153 146 Returns ``files` with the removed files omitted."""
154 147 maxsize = repo.ui.configbytes("packs", "maxpacksize")
155 148 if maxsize <= 0:
156 149 return files
157 150
158 151 # This only considers datapacks today, but we could broaden it to include
159 152 # historypacks.
160 153 VALIDEXTS = [".datapack", ".dataidx"]
161 154
162 155 # Either an oversize index or datapack will trigger cleanup of the whole
163 156 # pack:
164 157 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
165 158 if (stat.st_size > maxsize and (os.path.splitext(path)[1]
166 159 in VALIDEXTS))])
167 160
168 161 for rootfname in oversized:
169 162 rootpath = os.path.join(folder, rootfname)
170 163 for ext in VALIDEXTS:
171 164 path = rootpath + ext
172 165 repo.ui.debug('removing oversize packfile %s (%s)\n' %
173 166 (path, util.bytecount(os.stat(path).st_size)))
174 167 os.unlink(path)
175 168 return [row for row in files if os.path.basename(row[0]) not in oversized]
176 169
177 170 def _incrementalrepack(repo, datastore, historystore, packpath, category,
178 171 allowincompletedata=False, options=None):
179 172 shallowutil.mkstickygroupdir(repo.ui, packpath)
180 173
181 174 files = osutil.listdir(packpath, stat=True)
182 175 files = _deletebigpacks(repo, packpath, files)
183 176 datapacks = _topacks(packpath,
184 177 _computeincrementaldatapack(repo.ui, files),
185 178 datapack.datapack)
186 179 datapacks.extend(s for s in datastore
187 180 if not isinstance(s, datapack.datapackstore))
188 181
189 182 historypacks = _topacks(packpath,
190 183 _computeincrementalhistorypack(repo.ui, files),
191 184 historypack.historypack)
192 185 historypacks.extend(s for s in historystore
193 186 if not isinstance(s, historypack.historypackstore))
194 187
195 188 # ``allhistory{files,packs}`` contains all known history packs, even ones we
196 189 # don't plan to repack. They are used during the datapack repack to ensure
197 190 # good ordering of nodes.
198 191 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
199 192 historypack.INDEXSUFFIX)
200 193 allhistorypacks = _topacks(packpath,
201 194 (f for f, mode, stat in allhistoryfiles),
202 195 historypack.historypack)
203 196 allhistorypacks.extend(s for s in historystore
204 197 if not isinstance(s, historypack.historypackstore))
205 198 _runrepack(repo,
206 199 contentstore.unioncontentstore(
207 200 *datapacks,
208 201 allowincomplete=allowincompletedata),
209 202 metadatastore.unionmetadatastore(
210 203 *historypacks,
211 204 allowincomplete=True),
212 205 packpath, category,
213 206 fullhistory=metadatastore.unionmetadatastore(
214 207 *allhistorypacks,
215 208 allowincomplete=True),
216 209 options=options)
217 210
218 211 def _computeincrementaldatapack(ui, files):
219 212 opts = {
220 213 'gencountlimit' : ui.configint(
221 214 'remotefilelog', 'data.gencountlimit'),
222 215 'generations' : ui.configlist(
223 216 'remotefilelog', 'data.generations'),
224 217 'maxrepackpacks' : ui.configint(
225 218 'remotefilelog', 'data.maxrepackpacks'),
226 219 'repackmaxpacksize' : ui.configbytes(
227 220 'remotefilelog', 'data.repackmaxpacksize'),
228 221 'repacksizelimit' : ui.configbytes(
229 222 'remotefilelog', 'data.repacksizelimit'),
230 223 }
231 224
232 225 packfiles = _allpackfileswithsuffix(
233 226 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
234 227 return _computeincrementalpack(packfiles, opts)
235 228
236 229 def _computeincrementalhistorypack(ui, files):
237 230 opts = {
238 231 'gencountlimit' : ui.configint(
239 232 'remotefilelog', 'history.gencountlimit'),
240 233 'generations' : ui.configlist(
241 234 'remotefilelog', 'history.generations', ['100MB']),
242 235 'maxrepackpacks' : ui.configint(
243 236 'remotefilelog', 'history.maxrepackpacks'),
244 237 'repackmaxpacksize' : ui.configbytes(
245 238 'remotefilelog', 'history.repackmaxpacksize', '400MB'),
246 239 'repacksizelimit' : ui.configbytes(
247 240 'remotefilelog', 'history.repacksizelimit'),
248 241 }
249 242
250 243 packfiles = _allpackfileswithsuffix(
251 244 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
252 245 return _computeincrementalpack(packfiles, opts)
253 246
254 247 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
255 248 result = []
256 249 fileset = set(fn for fn, mode, stat in files)
257 250 for filename, mode, stat in files:
258 251 if not filename.endswith(packsuffix):
259 252 continue
260 253
261 254 prefix = filename[:-len(packsuffix)]
262 255
263 256 # Don't process a pack if it doesn't have an index.
264 257 if (prefix + indexsuffix) not in fileset:
265 258 continue
266 259 result.append((prefix, mode, stat))
267 260
268 261 return result
269 262
270 263 def _computeincrementalpack(files, opts):
271 264 """Given a set of pack files along with the configuration options, this
272 265 function computes the list of files that should be packed as part of an
273 266 incremental repack.
274 267
275 268 It tries to strike a balance between keeping incremental repacks cheap (i.e.
276 269 packing small things when possible, and rolling the packs up to the big ones
277 270 over time).
278 271 """
279 272
280 273 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
281 274 reverse=True))
282 275 limits.append(0)
283 276
284 277 # Group the packs by generation (i.e. by size)
285 278 generations = []
286 279 for i in pycompat.xrange(len(limits)):
287 280 generations.append([])
288 281
289 282 sizes = {}
290 283 for prefix, mode, stat in files:
291 284 size = stat.st_size
292 285 if size > opts['repackmaxpacksize']:
293 286 continue
294 287
295 288 sizes[prefix] = size
296 289 for i, limit in enumerate(limits):
297 290 if size > limit:
298 291 generations[i].append(prefix)
299 292 break
300 293
301 294 # Steps for picking what packs to repack:
302 295 # 1. Pick the largest generation with > gencountlimit pack files.
303 296 # 2. Take the smallest three packs.
304 297 # 3. While total-size-of-packs < repacksizelimit: add another pack
305 298
306 299 # Find the largest generation with more than gencountlimit packs
307 300 genpacks = []
308 301 for i, limit in enumerate(limits):
309 302 if len(generations[i]) > opts['gencountlimit']:
310 303 # Sort to be smallest last, for easy popping later
311 304 genpacks.extend(sorted(generations[i], reverse=True,
312 305 key=lambda x: sizes[x]))
313 306 break
314 307
315 308 # Take as many packs from the generation as we can
316 309 chosenpacks = genpacks[-3:]
317 310 genpacks = genpacks[:-3]
318 311 repacksize = sum(sizes[n] for n in chosenpacks)
319 312 while (repacksize < opts['repacksizelimit'] and genpacks and
320 313 len(chosenpacks) < opts['maxrepackpacks']):
321 314 chosenpacks.append(genpacks.pop())
322 315 repacksize += sizes[chosenpacks[-1]]
323 316
324 317 return chosenpacks
325 318
326 319 def _runrepack(repo, data, history, packpath, category, fullhistory=None,
327 320 options=None):
328 321 shallowutil.mkstickygroupdir(repo.ui, packpath)
329 322
330 323 def isold(repo, filename, node):
331 324 """Check if the file node is older than a limit.
332 325 Unless a limit is specified in the config the default limit is taken.
333 326 """
334 327 filectx = repo.filectx(filename, fileid=node)
335 328 filetime = repo[filectx.linkrev()].date()
336 329
337 330 ttl = repo.ui.configint('remotefilelog', 'nodettl')
338 331
339 332 limit = time.time() - ttl
340 333 return filetime[0] < limit
341 334
342 335 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
343 336 if not fullhistory:
344 337 fullhistory = history
345 338 packer = repacker(repo, data, history, fullhistory, category,
346 339 gc=garbagecollect, isold=isold, options=options)
347 340
348 341 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
349 342 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
350 343 try:
351 344 packer.run(dpack, hpack)
352 345 except error.LockHeld:
353 346 raise RepackAlreadyRunning(_("skipping repack - another repack "
354 347 "is already running"))
355 348
356 349 def keepset(repo, keyfn, lastkeepkeys=None):
357 350 """Computes a keepset which is not garbage collected.
358 351 'keyfn' is a function that maps filename, node to a unique key.
359 352 'lastkeepkeys' is an optional argument and if provided the keepset
360 353 function updates lastkeepkeys with more keys and returns the result.
361 354 """
362 355 if not lastkeepkeys:
363 356 keepkeys = set()
364 357 else:
365 358 keepkeys = lastkeepkeys
366 359
367 360 # We want to keep:
368 361 # 1. Working copy parent
369 362 # 2. Draft commits
370 363 # 3. Parents of draft commits
371 364 # 4. Pullprefetch and bgprefetchrevs revsets if specified
372 365 revs = ['.', 'draft()', 'parents(draft())']
373 366 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
374 367 if prefetchrevs:
375 368 revs.append('(%s)' % prefetchrevs)
376 369 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
377 370 if prefetchrevs:
378 371 revs.append('(%s)' % prefetchrevs)
379 372 revs = '+'.join(revs)
380 373
381 374 revs = ['sort((%s), "topo")' % revs]
382 375 keep = scmutil.revrange(repo, revs)
383 376
384 377 processed = set()
385 378 lastmanifest = None
386 379
387 380 # process the commits in toposorted order starting from the oldest
388 381 for r in reversed(keep._list):
389 382 if repo[r].p1().rev() in processed:
390 383 # if the direct parent has already been processed
391 384 # then we only need to process the delta
392 385 m = repo[r].manifestctx().readdelta()
393 386 else:
394 387 # otherwise take the manifest and diff it
395 388 # with the previous manifest if one exists
396 389 if lastmanifest:
397 390 m = repo[r].manifest().diff(lastmanifest)
398 391 else:
399 392 m = repo[r].manifest()
400 393 lastmanifest = repo[r].manifest()
401 394 processed.add(r)
402 395
403 396 # populate keepkeys with keys from the current manifest
404 397 if type(m) is dict:
405 398 # m is a result of diff of two manifests and is a dictionary that
406 399 # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
407 400 for filename, diff in m.iteritems():
408 401 if diff[0][0] is not None:
409 402 keepkeys.add(keyfn(filename, diff[0][0]))
410 403 else:
411 404 # m is a manifest object
412 405 for filename, filenode in m.iteritems():
413 406 keepkeys.add(keyfn(filename, filenode))
414 407
415 408 return keepkeys
416 409
417 410 class repacker(object):
418 411 """Class for orchestrating the repack of data and history information into a
419 412 new format.
420 413 """
421 414 def __init__(self, repo, data, history, fullhistory, category, gc=False,
422 415 isold=None, options=None):
423 416 self.repo = repo
424 417 self.data = data
425 418 self.history = history
426 419 self.fullhistory = fullhistory
427 420 self.unit = constants.getunits(category)
428 421 self.garbagecollect = gc
429 422 self.options = options
430 423 if self.garbagecollect:
431 424 if not isold:
432 425 raise ValueError("Function 'isold' is not properly specified")
433 426 # use (filename, node) tuple as a keepset key
434 427 self.keepkeys = keepset(repo, lambda f, n : (f, n))
435 428 self.isold = isold
436 429
437 430 def run(self, targetdata, targethistory):
438 431 ledger = repackledger()
439 432
440 433 with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
441 434 _('repacking %s') % self.repo.origroot, timeout=0):
442 435 self.repo.hook('prerepack')
443 436
444 437 # Populate ledger from source
445 438 self.data.markledger(ledger, options=self.options)
446 439 self.history.markledger(ledger, options=self.options)
447 440
448 441 # Run repack
449 442 self.repackdata(ledger, targetdata)
450 443 self.repackhistory(ledger, targethistory)
451 444
452 445 # Call cleanup on each source
453 446 for source in ledger.sources:
454 447 source.cleanup(ledger)
455 448
456 449 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
457 450 """Reorderes ``orphans`` into a single chain inside ``nodes`` and
458 451 ``deltabases``.
459 452
460 453 We often have orphan entries (nodes without a base that aren't
461 454 referenced by other nodes -- i.e., part of a chain) due to gaps in
462 455 history. Rather than store them as individual fulltexts, we prefer to
463 456 insert them as one chain sorted by size.
464 457 """
465 458 if not orphans:
466 459 return nodes
467 460
468 461 def getsize(node, default=0):
469 462 meta = self.data.getmeta(filename, node)
470 463 if constants.METAKEYSIZE in meta:
471 464 return meta[constants.METAKEYSIZE]
472 465 else:
473 466 return default
474 467
475 468 # Sort orphans by size; biggest first is preferred, since it's more
476 469 # likely to be the newest version assuming files grow over time.
477 470 # (Sort by node first to ensure the sort is stable.)
478 471 orphans = sorted(orphans)
479 472 orphans = list(sorted(orphans, key=getsize, reverse=True))
480 473 if ui.debugflag:
481 474 ui.debug("%s: orphan chain: %s\n" % (filename,
482 475 ", ".join([short(s) for s in orphans])))
483 476
484 477 # Create one contiguous chain and reassign deltabases.
485 478 for i, node in enumerate(orphans):
486 479 if i == 0:
487 480 deltabases[node] = (nullid, 0)
488 481 else:
489 482 parent = orphans[i - 1]
490 483 deltabases[node] = (parent, deltabases[parent][1] + 1)
491 484 nodes = filter(lambda node: node not in orphans, nodes)
492 485 nodes += orphans
493 486 return nodes
494 487
495 488 def repackdata(self, ledger, target):
496 489 ui = self.repo.ui
497 490 maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
498 491
499 492 byfile = {}
500 493 for entry in ledger.entries.itervalues():
501 494 if entry.datasource:
502 495 byfile.setdefault(entry.filename, {})[entry.node] = entry
503 496
504 497 count = 0
505 498 for filename, entries in sorted(byfile.iteritems()):
506 499 ui.progress(_("repacking data"), count, unit=self.unit,
507 500 total=len(byfile))
508 501
509 502 ancestors = {}
510 503 nodes = list(node for node in entries)
511 504 nohistory = []
512 505 for i, node in enumerate(nodes):
513 506 if node in ancestors:
514 507 continue
515 508 ui.progress(_("building history"), i, unit='nodes',
516 509 total=len(nodes))
517 510 try:
518 511 ancestors.update(self.fullhistory.getancestors(filename,
519 512 node, known=ancestors))
520 513 except KeyError:
521 514 # Since we're packing data entries, we may not have the
522 515 # corresponding history entries for them. It's not a big
523 516 # deal, but the entries won't be delta'd perfectly.
524 517 nohistory.append(node)
525 518 ui.progress(_("building history"), None)
526 519
527 520 # Order the nodes children first, so we can produce reverse deltas
528 521 orderednodes = list(reversed(self._toposort(ancestors)))
529 522 if len(nohistory) > 0:
530 523 ui.debug('repackdata: %d nodes without history\n' %
531 524 len(nohistory))
532 525 orderednodes.extend(sorted(nohistory))
533 526
534 527 # Filter orderednodes to just the nodes we want to serialize (it
535 528 # currently also has the edge nodes' ancestors).
536 529 orderednodes = filter(lambda node: node in nodes, orderednodes)
537 530
538 531 # Garbage collect old nodes:
539 532 if self.garbagecollect:
540 533 neworderednodes = []
541 534 for node in orderednodes:
542 535 # If the node is old and is not in the keepset, we skip it,
543 536 # and mark as garbage collected
544 537 if ((filename, node) not in self.keepkeys and
545 538 self.isold(self.repo, filename, node)):
546 539 entries[node].gced = True
547 540 continue
548 541 neworderednodes.append(node)
549 542 orderednodes = neworderednodes
550 543
551 544 # Compute delta bases for nodes:
552 545 deltabases = {}
553 546 nobase = set()
554 547 referenced = set()
555 548 nodes = set(nodes)
556 549 for i, node in enumerate(orderednodes):
557 550 ui.progress(_("processing nodes"), i, unit='nodes',
558 551 total=len(orderednodes))
559 552 # Find delta base
560 553 # TODO: allow delta'ing against most recent descendant instead
561 554 # of immediate child
562 555 deltatuple = deltabases.get(node, None)
563 556 if deltatuple is None:
564 557 deltabase, chainlen = nullid, 0
565 558 deltabases[node] = (nullid, 0)
566 559 nobase.add(node)
567 560 else:
568 561 deltabase, chainlen = deltatuple
569 562 referenced.add(deltabase)
570 563
571 564 # Use available ancestor information to inform our delta choices
572 565 ancestorinfo = ancestors.get(node)
573 566 if ancestorinfo:
574 567 p1, p2, linknode, copyfrom = ancestorinfo
575 568
576 569 # The presence of copyfrom means we're at a point where the
577 570 # file was copied from elsewhere. So don't attempt to do any
578 571 # deltas with the other file.
579 572 if copyfrom:
580 573 p1 = nullid
581 574
582 575 if chainlen < maxchainlen:
583 576 # Record this child as the delta base for its parents.
584 577 # This may be non optimal, since the parents may have
585 578 # many children, and this will only choose the last one.
586 579 # TODO: record all children and try all deltas to find
587 580 # best
588 581 if p1 != nullid:
589 582 deltabases[p1] = (node, chainlen + 1)
590 583 if p2 != nullid:
591 584 deltabases[p2] = (node, chainlen + 1)
592 585
593 586 # experimental config: repack.chainorphansbysize
594 587 if ui.configbool('repack', 'chainorphansbysize'):
595 588 orphans = nobase - referenced
596 589 orderednodes = self._chainorphans(ui, filename, orderednodes,
597 590 orphans, deltabases)
598 591
599 592 # Compute deltas and write to the pack
600 593 for i, node in enumerate(orderednodes):
601 594 deltabase, chainlen = deltabases[node]
602 595 # Compute delta
603 596 # TODO: Optimize the deltachain fetching. Since we're
604 597 # iterating over the different version of the file, we may
605 598 # be fetching the same deltachain over and over again.
606 599 meta = None
607 600 if deltabase != nullid:
608 601 deltaentry = self.data.getdelta(filename, node)
609 602 delta, deltabasename, origdeltabase, meta = deltaentry
610 603 size = meta.get(constants.METAKEYSIZE)
611 604 if (deltabasename != filename or origdeltabase != deltabase
612 605 or size is None):
613 606 deltabasetext = self.data.get(filename, deltabase)
614 607 original = self.data.get(filename, node)
615 608 size = len(original)
616 609 delta = mdiff.textdiff(deltabasetext, original)
617 610 else:
618 611 delta = self.data.get(filename, node)
619 612 size = len(delta)
620 613 meta = self.data.getmeta(filename, node)
621 614
622 615 # TODO: don't use the delta if it's larger than the fulltext
623 616 if constants.METAKEYSIZE not in meta:
624 617 meta[constants.METAKEYSIZE] = size
625 618 target.add(filename, node, deltabase, delta, meta)
626 619
627 620 entries[node].datarepacked = True
628 621
629 622 ui.progress(_("processing nodes"), None)
630 623 count += 1
631 624
632 625 ui.progress(_("repacking data"), None)
633 626 target.close(ledger=ledger)
634 627
635 628 def repackhistory(self, ledger, target):
636 629 ui = self.repo.ui
637 630
638 631 byfile = {}
639 632 for entry in ledger.entries.itervalues():
640 633 if entry.historysource:
641 634 byfile.setdefault(entry.filename, {})[entry.node] = entry
642 635
643 636 count = 0
644 637 for filename, entries in sorted(byfile.iteritems()):
645 638 ancestors = {}
646 639 nodes = list(node for node in entries)
647 640
648 641 for node in nodes:
649 642 if node in ancestors:
650 643 continue
651 644 ancestors.update(self.history.getancestors(filename, node,
652 645 known=ancestors))
653 646
654 647 # Order the nodes children first
655 648 orderednodes = reversed(self._toposort(ancestors))
656 649
657 650 # Write to the pack
658 651 dontprocess = set()
659 652 for node in orderednodes:
660 653 p1, p2, linknode, copyfrom = ancestors[node]
661 654
662 655 # If the node is marked dontprocess, but it's also in the
663 656 # explicit entries set, that means the node exists both in this
664 657 # file and in another file that was copied to this file.
665 658 # Usually this happens if the file was copied to another file,
666 659 # then the copy was deleted, then reintroduced without copy
667 660 # metadata. The original add and the new add have the same hash
668 661 # since the content is identical and the parents are null.
669 662 if node in dontprocess and node not in entries:
670 663 # If copyfrom == filename, it means the copy history
671 664 # went to come other file, then came back to this one, so we
672 665 # should continue processing it.
673 666 if p1 != nullid and copyfrom != filename:
674 667 dontprocess.add(p1)
675 668 if p2 != nullid:
676 669 dontprocess.add(p2)
677 670 continue
678 671
679 672 if copyfrom:
680 673 dontprocess.add(p1)
681 674
682 675 target.add(filename, node, p1, p2, linknode, copyfrom)
683 676
684 677 if node in entries:
685 678 entries[node].historyrepacked = True
686 679
687 680 count += 1
688 681 ui.progress(_("repacking history"), count, unit=self.unit,
689 682 total=len(byfile))
690 683
691 684 ui.progress(_("repacking history"), None)
692 685 target.close(ledger=ledger)
693 686
694 687 def _toposort(self, ancestors):
695 688 def parentfunc(node):
696 689 p1, p2, linknode, copyfrom = ancestors[node]
697 690 parents = []
698 691 if p1 != nullid:
699 692 parents.append(p1)
700 693 if p2 != nullid:
701 694 parents.append(p2)
702 695 return parents
703 696
704 697 sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
705 698 return sortednodes
706 699
707 700 class repackledger(object):
708 701 """Storage for all the bookkeeping that happens during a repack. It contains
709 702 the list of revisions being repacked, what happened to each revision, and
710 703 which source store contained which revision originally (for later cleanup).
711 704 """
712 705 def __init__(self):
713 706 self.entries = {}
714 707 self.sources = {}
715 708 self.created = set()
716 709
717 710 def markdataentry(self, source, filename, node):
718 711 """Mark the given filename+node revision as having a data rev in the
719 712 given source.
720 713 """
721 714 entry = self._getorcreateentry(filename, node)
722 715 entry.datasource = True
723 716 entries = self.sources.get(source)
724 717 if not entries:
725 718 entries = set()
726 719 self.sources[source] = entries
727 720 entries.add(entry)
728 721
729 722 def markhistoryentry(self, source, filename, node):
730 723 """Mark the given filename+node revision as having a history rev in the
731 724 given source.
732 725 """
733 726 entry = self._getorcreateentry(filename, node)
734 727 entry.historysource = True
735 728 entries = self.sources.get(source)
736 729 if not entries:
737 730 entries = set()
738 731 self.sources[source] = entries
739 732 entries.add(entry)
740 733
741 734 def _getorcreateentry(self, filename, node):
742 735 key = (filename, node)
743 736 value = self.entries.get(key)
744 737 if not value:
745 738 value = repackentry(filename, node)
746 739 self.entries[key] = value
747 740
748 741 return value
749 742
750 743 def addcreated(self, value):
751 744 self.created.add(value)
752 745
753 746 class repackentry(object):
754 747 """Simple class representing a single revision entry in the repackledger.
755 748 """
756 749 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
757 750 r'datarepacked', r'historyrepacked', r'gced')
758 751 def __init__(self, filename, node):
759 752 self.filename = filename
760 753 self.node = node
761 754 # If the revision has a data entry in the source
762 755 self.datasource = False
763 756 # If the revision has a history entry in the source
764 757 self.historysource = False
765 758 # If the revision's data entry was repacked into the repack target
766 759 self.datarepacked = False
767 760 # If the revision's history entry was repacked into the repack target
768 761 self.historyrepacked = False
769 762 # If garbage collected
770 763 self.gced = False
771 764
772 765 def repacklockvfs(repo):
773 766 if util.safehasattr(repo, 'name'):
774 767 # Lock in the shared cache so repacks across multiple copies of the same
775 768 # repo are coordinated.
776 769 sharedcachepath = shallowutil.getcachepackpath(
777 770 repo,
778 771 constants.FILEPACK_CATEGORY)
779 772 return vfs.vfs(sharedcachepath)
780 773 else:
781 774 return repo.svfs
General Comments 0
You need to be logged in to leave comments. Login now