##// END OF EJS Templates
py3: listify filter() to call len() on it...
Pulkit Goyal -
r40840:bad0053e default
parent child Browse files
Show More
@@ -1,774 +1,775 b''
1 1 from __future__ import absolute_import
2 2
3 3 import os
4 4 import time
5 5
6 6 from mercurial.i18n import _
7 7 from mercurial.node import (
8 8 nullid,
9 9 short,
10 10 )
11 11 from mercurial import (
12 12 encoding,
13 13 error,
14 14 mdiff,
15 15 policy,
16 16 pycompat,
17 17 scmutil,
18 18 util,
19 19 vfs,
20 20 )
21 21 from mercurial.utils import procutil
22 22 from . import (
23 23 constants,
24 24 contentstore,
25 25 datapack,
26 26 extutil,
27 27 historypack,
28 28 metadatastore,
29 29 shallowutil,
30 30 )
31 31
32 32 osutil = policy.importmod(r'osutil')
33 33
34 34 class RepackAlreadyRunning(error.Abort):
35 35 pass
36 36
37 37 def backgroundrepack(repo, incremental=True, packsonly=False):
38 38 cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
39 39 msg = _("(running background repack)\n")
40 40 if incremental:
41 41 cmd.append('--incremental')
42 42 msg = _("(running background incremental repack)\n")
43 43 if packsonly:
44 44 cmd.append('--packsonly')
45 45 repo.ui.warn(msg)
46 46 procutil.runbgcommand(cmd, encoding.environ)
47 47
48 48 def fullrepack(repo, options=None):
49 49 """If ``packsonly`` is True, stores creating only loose objects are skipped.
50 50 """
51 51 if util.safehasattr(repo, 'shareddatastores'):
52 52 datasource = contentstore.unioncontentstore(
53 53 *repo.shareddatastores)
54 54 historysource = metadatastore.unionmetadatastore(
55 55 *repo.sharedhistorystores,
56 56 allowincomplete=True)
57 57
58 58 packpath = shallowutil.getcachepackpath(
59 59 repo,
60 60 constants.FILEPACK_CATEGORY)
61 61 _runrepack(repo, datasource, historysource, packpath,
62 62 constants.FILEPACK_CATEGORY, options=options)
63 63
64 64 if util.safehasattr(repo.manifestlog, 'datastore'):
65 65 localdata, shareddata = _getmanifeststores(repo)
66 66 lpackpath, ldstores, lhstores = localdata
67 67 spackpath, sdstores, shstores = shareddata
68 68
69 69 # Repack the shared manifest store
70 70 datasource = contentstore.unioncontentstore(*sdstores)
71 71 historysource = metadatastore.unionmetadatastore(
72 72 *shstores,
73 73 allowincomplete=True)
74 74 _runrepack(repo, datasource, historysource, spackpath,
75 75 constants.TREEPACK_CATEGORY, options=options)
76 76
77 77 # Repack the local manifest store
78 78 datasource = contentstore.unioncontentstore(
79 79 *ldstores,
80 80 allowincomplete=True)
81 81 historysource = metadatastore.unionmetadatastore(
82 82 *lhstores,
83 83 allowincomplete=True)
84 84 _runrepack(repo, datasource, historysource, lpackpath,
85 85 constants.TREEPACK_CATEGORY, options=options)
86 86
87 87 def incrementalrepack(repo, options=None):
88 88 """This repacks the repo by looking at the distribution of pack files in the
89 89 repo and performing the most minimal repack to keep the repo in good shape.
90 90 """
91 91 if util.safehasattr(repo, 'shareddatastores'):
92 92 packpath = shallowutil.getcachepackpath(
93 93 repo,
94 94 constants.FILEPACK_CATEGORY)
95 95 _incrementalrepack(repo,
96 96 repo.shareddatastores,
97 97 repo.sharedhistorystores,
98 98 packpath,
99 99 constants.FILEPACK_CATEGORY,
100 100 options=options)
101 101
102 102 if util.safehasattr(repo.manifestlog, 'datastore'):
103 103 localdata, shareddata = _getmanifeststores(repo)
104 104 lpackpath, ldstores, lhstores = localdata
105 105 spackpath, sdstores, shstores = shareddata
106 106
107 107 # Repack the shared manifest store
108 108 _incrementalrepack(repo,
109 109 sdstores,
110 110 shstores,
111 111 spackpath,
112 112 constants.TREEPACK_CATEGORY,
113 113 options=options)
114 114
115 115 # Repack the local manifest store
116 116 _incrementalrepack(repo,
117 117 ldstores,
118 118 lhstores,
119 119 lpackpath,
120 120 constants.TREEPACK_CATEGORY,
121 121 allowincompletedata=True,
122 122 options=options)
123 123
124 124 def _getmanifeststores(repo):
125 125 shareddatastores = repo.manifestlog.shareddatastores
126 126 localdatastores = repo.manifestlog.localdatastores
127 127 sharedhistorystores = repo.manifestlog.sharedhistorystores
128 128 localhistorystores = repo.manifestlog.localhistorystores
129 129
130 130 sharedpackpath = shallowutil.getcachepackpath(repo,
131 131 constants.TREEPACK_CATEGORY)
132 132 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
133 133 constants.TREEPACK_CATEGORY)
134 134
135 135 return ((localpackpath, localdatastores, localhistorystores),
136 136 (sharedpackpath, shareddatastores, sharedhistorystores))
137 137
138 138 def _topacks(packpath, files, constructor):
139 139 paths = list(os.path.join(packpath, p) for p in files)
140 140 packs = list(constructor(p) for p in paths)
141 141 return packs
142 142
143 143 def _deletebigpacks(repo, folder, files):
144 144 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
145 145
146 146 Returns ``files` with the removed files omitted."""
147 147 maxsize = repo.ui.configbytes("packs", "maxpacksize")
148 148 if maxsize <= 0:
149 149 return files
150 150
151 151 # This only considers datapacks today, but we could broaden it to include
152 152 # historypacks.
153 153 VALIDEXTS = [".datapack", ".dataidx"]
154 154
155 155 # Either an oversize index or datapack will trigger cleanup of the whole
156 156 # pack:
157 157 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
158 158 if (stat.st_size > maxsize and (os.path.splitext(path)[1]
159 159 in VALIDEXTS))])
160 160
161 161 for rootfname in oversized:
162 162 rootpath = os.path.join(folder, rootfname)
163 163 for ext in VALIDEXTS:
164 164 path = rootpath + ext
165 165 repo.ui.debug('removing oversize packfile %s (%s)\n' %
166 166 (path, util.bytecount(os.stat(path).st_size)))
167 167 os.unlink(path)
168 168 return [row for row in files if os.path.basename(row[0]) not in oversized]
169 169
170 170 def _incrementalrepack(repo, datastore, historystore, packpath, category,
171 171 allowincompletedata=False, options=None):
172 172 shallowutil.mkstickygroupdir(repo.ui, packpath)
173 173
174 174 files = osutil.listdir(packpath, stat=True)
175 175 files = _deletebigpacks(repo, packpath, files)
176 176 datapacks = _topacks(packpath,
177 177 _computeincrementaldatapack(repo.ui, files),
178 178 datapack.datapack)
179 179 datapacks.extend(s for s in datastore
180 180 if not isinstance(s, datapack.datapackstore))
181 181
182 182 historypacks = _topacks(packpath,
183 183 _computeincrementalhistorypack(repo.ui, files),
184 184 historypack.historypack)
185 185 historypacks.extend(s for s in historystore
186 186 if not isinstance(s, historypack.historypackstore))
187 187
188 188 # ``allhistory{files,packs}`` contains all known history packs, even ones we
189 189 # don't plan to repack. They are used during the datapack repack to ensure
190 190 # good ordering of nodes.
191 191 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
192 192 historypack.INDEXSUFFIX)
193 193 allhistorypacks = _topacks(packpath,
194 194 (f for f, mode, stat in allhistoryfiles),
195 195 historypack.historypack)
196 196 allhistorypacks.extend(s for s in historystore
197 197 if not isinstance(s, historypack.historypackstore))
198 198 _runrepack(repo,
199 199 contentstore.unioncontentstore(
200 200 *datapacks,
201 201 allowincomplete=allowincompletedata),
202 202 metadatastore.unionmetadatastore(
203 203 *historypacks,
204 204 allowincomplete=True),
205 205 packpath, category,
206 206 fullhistory=metadatastore.unionmetadatastore(
207 207 *allhistorypacks,
208 208 allowincomplete=True),
209 209 options=options)
210 210
211 211 def _computeincrementaldatapack(ui, files):
212 212 opts = {
213 213 'gencountlimit' : ui.configint(
214 214 'remotefilelog', 'data.gencountlimit'),
215 215 'generations' : ui.configlist(
216 216 'remotefilelog', 'data.generations'),
217 217 'maxrepackpacks' : ui.configint(
218 218 'remotefilelog', 'data.maxrepackpacks'),
219 219 'repackmaxpacksize' : ui.configbytes(
220 220 'remotefilelog', 'data.repackmaxpacksize'),
221 221 'repacksizelimit' : ui.configbytes(
222 222 'remotefilelog', 'data.repacksizelimit'),
223 223 }
224 224
225 225 packfiles = _allpackfileswithsuffix(
226 226 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
227 227 return _computeincrementalpack(packfiles, opts)
228 228
229 229 def _computeincrementalhistorypack(ui, files):
230 230 opts = {
231 231 'gencountlimit' : ui.configint(
232 232 'remotefilelog', 'history.gencountlimit'),
233 233 'generations' : ui.configlist(
234 234 'remotefilelog', 'history.generations', ['100MB']),
235 235 'maxrepackpacks' : ui.configint(
236 236 'remotefilelog', 'history.maxrepackpacks'),
237 237 'repackmaxpacksize' : ui.configbytes(
238 238 'remotefilelog', 'history.repackmaxpacksize', '400MB'),
239 239 'repacksizelimit' : ui.configbytes(
240 240 'remotefilelog', 'history.repacksizelimit'),
241 241 }
242 242
243 243 packfiles = _allpackfileswithsuffix(
244 244 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
245 245 return _computeincrementalpack(packfiles, opts)
246 246
247 247 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
248 248 result = []
249 249 fileset = set(fn for fn, mode, stat in files)
250 250 for filename, mode, stat in files:
251 251 if not filename.endswith(packsuffix):
252 252 continue
253 253
254 254 prefix = filename[:-len(packsuffix)]
255 255
256 256 # Don't process a pack if it doesn't have an index.
257 257 if (prefix + indexsuffix) not in fileset:
258 258 continue
259 259 result.append((prefix, mode, stat))
260 260
261 261 return result
262 262
263 263 def _computeincrementalpack(files, opts):
264 264 """Given a set of pack files along with the configuration options, this
265 265 function computes the list of files that should be packed as part of an
266 266 incremental repack.
267 267
268 268 It tries to strike a balance between keeping incremental repacks cheap (i.e.
269 269 packing small things when possible, and rolling the packs up to the big ones
270 270 over time).
271 271 """
272 272
273 273 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
274 274 reverse=True))
275 275 limits.append(0)
276 276
277 277 # Group the packs by generation (i.e. by size)
278 278 generations = []
279 279 for i in pycompat.xrange(len(limits)):
280 280 generations.append([])
281 281
282 282 sizes = {}
283 283 for prefix, mode, stat in files:
284 284 size = stat.st_size
285 285 if size > opts['repackmaxpacksize']:
286 286 continue
287 287
288 288 sizes[prefix] = size
289 289 for i, limit in enumerate(limits):
290 290 if size > limit:
291 291 generations[i].append(prefix)
292 292 break
293 293
294 294 # Steps for picking what packs to repack:
295 295 # 1. Pick the largest generation with > gencountlimit pack files.
296 296 # 2. Take the smallest three packs.
297 297 # 3. While total-size-of-packs < repacksizelimit: add another pack
298 298
299 299 # Find the largest generation with more than gencountlimit packs
300 300 genpacks = []
301 301 for i, limit in enumerate(limits):
302 302 if len(generations[i]) > opts['gencountlimit']:
303 303 # Sort to be smallest last, for easy popping later
304 304 genpacks.extend(sorted(generations[i], reverse=True,
305 305 key=lambda x: sizes[x]))
306 306 break
307 307
308 308 # Take as many packs from the generation as we can
309 309 chosenpacks = genpacks[-3:]
310 310 genpacks = genpacks[:-3]
311 311 repacksize = sum(sizes[n] for n in chosenpacks)
312 312 while (repacksize < opts['repacksizelimit'] and genpacks and
313 313 len(chosenpacks) < opts['maxrepackpacks']):
314 314 chosenpacks.append(genpacks.pop())
315 315 repacksize += sizes[chosenpacks[-1]]
316 316
317 317 return chosenpacks
318 318
319 319 def _runrepack(repo, data, history, packpath, category, fullhistory=None,
320 320 options=None):
321 321 shallowutil.mkstickygroupdir(repo.ui, packpath)
322 322
323 323 def isold(repo, filename, node):
324 324 """Check if the file node is older than a limit.
325 325 Unless a limit is specified in the config the default limit is taken.
326 326 """
327 327 filectx = repo.filectx(filename, fileid=node)
328 328 filetime = repo[filectx.linkrev()].date()
329 329
330 330 ttl = repo.ui.configint('remotefilelog', 'nodettl')
331 331
332 332 limit = time.time() - ttl
333 333 return filetime[0] < limit
334 334
335 335 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
336 336 if not fullhistory:
337 337 fullhistory = history
338 338 packer = repacker(repo, data, history, fullhistory, category,
339 339 gc=garbagecollect, isold=isold, options=options)
340 340
341 341 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
342 342 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
343 343 try:
344 344 packer.run(dpack, hpack)
345 345 except error.LockHeld:
346 346 raise RepackAlreadyRunning(_("skipping repack - another repack "
347 347 "is already running"))
348 348
349 349 def keepset(repo, keyfn, lastkeepkeys=None):
350 350 """Computes a keepset which is not garbage collected.
351 351 'keyfn' is a function that maps filename, node to a unique key.
352 352 'lastkeepkeys' is an optional argument and if provided the keepset
353 353 function updates lastkeepkeys with more keys and returns the result.
354 354 """
355 355 if not lastkeepkeys:
356 356 keepkeys = set()
357 357 else:
358 358 keepkeys = lastkeepkeys
359 359
360 360 # We want to keep:
361 361 # 1. Working copy parent
362 362 # 2. Draft commits
363 363 # 3. Parents of draft commits
364 364 # 4. Pullprefetch and bgprefetchrevs revsets if specified
365 365 revs = ['.', 'draft()', 'parents(draft())']
366 366 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
367 367 if prefetchrevs:
368 368 revs.append('(%s)' % prefetchrevs)
369 369 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
370 370 if prefetchrevs:
371 371 revs.append('(%s)' % prefetchrevs)
372 372 revs = '+'.join(revs)
373 373
374 374 revs = ['sort((%s), "topo")' % revs]
375 375 keep = scmutil.revrange(repo, revs)
376 376
377 377 processed = set()
378 378 lastmanifest = None
379 379
380 380 # process the commits in toposorted order starting from the oldest
381 381 for r in reversed(keep._list):
382 382 if repo[r].p1().rev() in processed:
383 383 # if the direct parent has already been processed
384 384 # then we only need to process the delta
385 385 m = repo[r].manifestctx().readdelta()
386 386 else:
387 387 # otherwise take the manifest and diff it
388 388 # with the previous manifest if one exists
389 389 if lastmanifest:
390 390 m = repo[r].manifest().diff(lastmanifest)
391 391 else:
392 392 m = repo[r].manifest()
393 393 lastmanifest = repo[r].manifest()
394 394 processed.add(r)
395 395
396 396 # populate keepkeys with keys from the current manifest
397 397 if type(m) is dict:
398 398 # m is a result of diff of two manifests and is a dictionary that
399 399 # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
400 400 for filename, diff in m.iteritems():
401 401 if diff[0][0] is not None:
402 402 keepkeys.add(keyfn(filename, diff[0][0]))
403 403 else:
404 404 # m is a manifest object
405 405 for filename, filenode in m.iteritems():
406 406 keepkeys.add(keyfn(filename, filenode))
407 407
408 408 return keepkeys
409 409
410 410 class repacker(object):
411 411 """Class for orchestrating the repack of data and history information into a
412 412 new format.
413 413 """
414 414 def __init__(self, repo, data, history, fullhistory, category, gc=False,
415 415 isold=None, options=None):
416 416 self.repo = repo
417 417 self.data = data
418 418 self.history = history
419 419 self.fullhistory = fullhistory
420 420 self.unit = constants.getunits(category)
421 421 self.garbagecollect = gc
422 422 self.options = options
423 423 if self.garbagecollect:
424 424 if not isold:
425 425 raise ValueError("Function 'isold' is not properly specified")
426 426 # use (filename, node) tuple as a keepset key
427 427 self.keepkeys = keepset(repo, lambda f, n : (f, n))
428 428 self.isold = isold
429 429
430 430 def run(self, targetdata, targethistory):
431 431 ledger = repackledger()
432 432
433 433 with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
434 434 _('repacking %s') % self.repo.origroot, timeout=0):
435 435 self.repo.hook('prerepack')
436 436
437 437 # Populate ledger from source
438 438 self.data.markledger(ledger, options=self.options)
439 439 self.history.markledger(ledger, options=self.options)
440 440
441 441 # Run repack
442 442 self.repackdata(ledger, targetdata)
443 443 self.repackhistory(ledger, targethistory)
444 444
445 445 # Call cleanup on each source
446 446 for source in ledger.sources:
447 447 source.cleanup(ledger)
448 448
449 449 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
450 450 """Reorderes ``orphans`` into a single chain inside ``nodes`` and
451 451 ``deltabases``.
452 452
453 453 We often have orphan entries (nodes without a base that aren't
454 454 referenced by other nodes -- i.e., part of a chain) due to gaps in
455 455 history. Rather than store them as individual fulltexts, we prefer to
456 456 insert them as one chain sorted by size.
457 457 """
458 458 if not orphans:
459 459 return nodes
460 460
461 461 def getsize(node, default=0):
462 462 meta = self.data.getmeta(filename, node)
463 463 if constants.METAKEYSIZE in meta:
464 464 return meta[constants.METAKEYSIZE]
465 465 else:
466 466 return default
467 467
468 468 # Sort orphans by size; biggest first is preferred, since it's more
469 469 # likely to be the newest version assuming files grow over time.
470 470 # (Sort by node first to ensure the sort is stable.)
471 471 orphans = sorted(orphans)
472 472 orphans = list(sorted(orphans, key=getsize, reverse=True))
473 473 if ui.debugflag:
474 474 ui.debug("%s: orphan chain: %s\n" % (filename,
475 475 ", ".join([short(s) for s in orphans])))
476 476
477 477 # Create one contiguous chain and reassign deltabases.
478 478 for i, node in enumerate(orphans):
479 479 if i == 0:
480 480 deltabases[node] = (nullid, 0)
481 481 else:
482 482 parent = orphans[i - 1]
483 483 deltabases[node] = (parent, deltabases[parent][1] + 1)
484 484 nodes = filter(lambda node: node not in orphans, nodes)
485 485 nodes += orphans
486 486 return nodes
487 487
488 488 def repackdata(self, ledger, target):
489 489 ui = self.repo.ui
490 490 maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
491 491
492 492 byfile = {}
493 493 for entry in ledger.entries.itervalues():
494 494 if entry.datasource:
495 495 byfile.setdefault(entry.filename, {})[entry.node] = entry
496 496
497 497 count = 0
498 498 for filename, entries in sorted(byfile.iteritems()):
499 499 ui.progress(_("repacking data"), count, unit=self.unit,
500 500 total=len(byfile))
501 501
502 502 ancestors = {}
503 503 nodes = list(node for node in entries)
504 504 nohistory = []
505 505 for i, node in enumerate(nodes):
506 506 if node in ancestors:
507 507 continue
508 508 ui.progress(_("building history"), i, unit='nodes',
509 509 total=len(nodes))
510 510 try:
511 511 ancestors.update(self.fullhistory.getancestors(filename,
512 512 node, known=ancestors))
513 513 except KeyError:
514 514 # Since we're packing data entries, we may not have the
515 515 # corresponding history entries for them. It's not a big
516 516 # deal, but the entries won't be delta'd perfectly.
517 517 nohistory.append(node)
518 518 ui.progress(_("building history"), None)
519 519
520 520 # Order the nodes children first, so we can produce reverse deltas
521 521 orderednodes = list(reversed(self._toposort(ancestors)))
522 522 if len(nohistory) > 0:
523 523 ui.debug('repackdata: %d nodes without history\n' %
524 524 len(nohistory))
525 525 orderednodes.extend(sorted(nohistory))
526 526
527 527 # Filter orderednodes to just the nodes we want to serialize (it
528 528 # currently also has the edge nodes' ancestors).
529 orderednodes = filter(lambda node: node in nodes, orderednodes)
529 orderednodes = list(filter(lambda node: node in nodes,
530 orderednodes))
530 531
531 532 # Garbage collect old nodes:
532 533 if self.garbagecollect:
533 534 neworderednodes = []
534 535 for node in orderednodes:
535 536 # If the node is old and is not in the keepset, we skip it,
536 537 # and mark as garbage collected
537 538 if ((filename, node) not in self.keepkeys and
538 539 self.isold(self.repo, filename, node)):
539 540 entries[node].gced = True
540 541 continue
541 542 neworderednodes.append(node)
542 543 orderednodes = neworderednodes
543 544
544 545 # Compute delta bases for nodes:
545 546 deltabases = {}
546 547 nobase = set()
547 548 referenced = set()
548 549 nodes = set(nodes)
549 550 for i, node in enumerate(orderednodes):
550 551 ui.progress(_("processing nodes"), i, unit='nodes',
551 552 total=len(orderednodes))
552 553 # Find delta base
553 554 # TODO: allow delta'ing against most recent descendant instead
554 555 # of immediate child
555 556 deltatuple = deltabases.get(node, None)
556 557 if deltatuple is None:
557 558 deltabase, chainlen = nullid, 0
558 559 deltabases[node] = (nullid, 0)
559 560 nobase.add(node)
560 561 else:
561 562 deltabase, chainlen = deltatuple
562 563 referenced.add(deltabase)
563 564
564 565 # Use available ancestor information to inform our delta choices
565 566 ancestorinfo = ancestors.get(node)
566 567 if ancestorinfo:
567 568 p1, p2, linknode, copyfrom = ancestorinfo
568 569
569 570 # The presence of copyfrom means we're at a point where the
570 571 # file was copied from elsewhere. So don't attempt to do any
571 572 # deltas with the other file.
572 573 if copyfrom:
573 574 p1 = nullid
574 575
575 576 if chainlen < maxchainlen:
576 577 # Record this child as the delta base for its parents.
577 578 # This may be non optimal, since the parents may have
578 579 # many children, and this will only choose the last one.
579 580 # TODO: record all children and try all deltas to find
580 581 # best
581 582 if p1 != nullid:
582 583 deltabases[p1] = (node, chainlen + 1)
583 584 if p2 != nullid:
584 585 deltabases[p2] = (node, chainlen + 1)
585 586
586 587 # experimental config: repack.chainorphansbysize
587 588 if ui.configbool('repack', 'chainorphansbysize'):
588 589 orphans = nobase - referenced
589 590 orderednodes = self._chainorphans(ui, filename, orderednodes,
590 591 orphans, deltabases)
591 592
592 593 # Compute deltas and write to the pack
593 594 for i, node in enumerate(orderednodes):
594 595 deltabase, chainlen = deltabases[node]
595 596 # Compute delta
596 597 # TODO: Optimize the deltachain fetching. Since we're
597 598 # iterating over the different version of the file, we may
598 599 # be fetching the same deltachain over and over again.
599 600 meta = None
600 601 if deltabase != nullid:
601 602 deltaentry = self.data.getdelta(filename, node)
602 603 delta, deltabasename, origdeltabase, meta = deltaentry
603 604 size = meta.get(constants.METAKEYSIZE)
604 605 if (deltabasename != filename or origdeltabase != deltabase
605 606 or size is None):
606 607 deltabasetext = self.data.get(filename, deltabase)
607 608 original = self.data.get(filename, node)
608 609 size = len(original)
609 610 delta = mdiff.textdiff(deltabasetext, original)
610 611 else:
611 612 delta = self.data.get(filename, node)
612 613 size = len(delta)
613 614 meta = self.data.getmeta(filename, node)
614 615
615 616 # TODO: don't use the delta if it's larger than the fulltext
616 617 if constants.METAKEYSIZE not in meta:
617 618 meta[constants.METAKEYSIZE] = size
618 619 target.add(filename, node, deltabase, delta, meta)
619 620
620 621 entries[node].datarepacked = True
621 622
622 623 ui.progress(_("processing nodes"), None)
623 624 count += 1
624 625
625 626 ui.progress(_("repacking data"), None)
626 627 target.close(ledger=ledger)
627 628
628 629 def repackhistory(self, ledger, target):
629 630 ui = self.repo.ui
630 631
631 632 byfile = {}
632 633 for entry in ledger.entries.itervalues():
633 634 if entry.historysource:
634 635 byfile.setdefault(entry.filename, {})[entry.node] = entry
635 636
636 637 count = 0
637 638 for filename, entries in sorted(byfile.iteritems()):
638 639 ancestors = {}
639 640 nodes = list(node for node in entries)
640 641
641 642 for node in nodes:
642 643 if node in ancestors:
643 644 continue
644 645 ancestors.update(self.history.getancestors(filename, node,
645 646 known=ancestors))
646 647
647 648 # Order the nodes children first
648 649 orderednodes = reversed(self._toposort(ancestors))
649 650
650 651 # Write to the pack
651 652 dontprocess = set()
652 653 for node in orderednodes:
653 654 p1, p2, linknode, copyfrom = ancestors[node]
654 655
655 656 # If the node is marked dontprocess, but it's also in the
656 657 # explicit entries set, that means the node exists both in this
657 658 # file and in another file that was copied to this file.
658 659 # Usually this happens if the file was copied to another file,
659 660 # then the copy was deleted, then reintroduced without copy
660 661 # metadata. The original add and the new add have the same hash
661 662 # since the content is identical and the parents are null.
662 663 if node in dontprocess and node not in entries:
663 664 # If copyfrom == filename, it means the copy history
664 665 # went to come other file, then came back to this one, so we
665 666 # should continue processing it.
666 667 if p1 != nullid and copyfrom != filename:
667 668 dontprocess.add(p1)
668 669 if p2 != nullid:
669 670 dontprocess.add(p2)
670 671 continue
671 672
672 673 if copyfrom:
673 674 dontprocess.add(p1)
674 675
675 676 target.add(filename, node, p1, p2, linknode, copyfrom)
676 677
677 678 if node in entries:
678 679 entries[node].historyrepacked = True
679 680
680 681 count += 1
681 682 ui.progress(_("repacking history"), count, unit=self.unit,
682 683 total=len(byfile))
683 684
684 685 ui.progress(_("repacking history"), None)
685 686 target.close(ledger=ledger)
686 687
687 688 def _toposort(self, ancestors):
688 689 def parentfunc(node):
689 690 p1, p2, linknode, copyfrom = ancestors[node]
690 691 parents = []
691 692 if p1 != nullid:
692 693 parents.append(p1)
693 694 if p2 != nullid:
694 695 parents.append(p2)
695 696 return parents
696 697
697 698 sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
698 699 return sortednodes
699 700
700 701 class repackledger(object):
701 702 """Storage for all the bookkeeping that happens during a repack. It contains
702 703 the list of revisions being repacked, what happened to each revision, and
703 704 which source store contained which revision originally (for later cleanup).
704 705 """
705 706 def __init__(self):
706 707 self.entries = {}
707 708 self.sources = {}
708 709 self.created = set()
709 710
710 711 def markdataentry(self, source, filename, node):
711 712 """Mark the given filename+node revision as having a data rev in the
712 713 given source.
713 714 """
714 715 entry = self._getorcreateentry(filename, node)
715 716 entry.datasource = True
716 717 entries = self.sources.get(source)
717 718 if not entries:
718 719 entries = set()
719 720 self.sources[source] = entries
720 721 entries.add(entry)
721 722
722 723 def markhistoryentry(self, source, filename, node):
723 724 """Mark the given filename+node revision as having a history rev in the
724 725 given source.
725 726 """
726 727 entry = self._getorcreateentry(filename, node)
727 728 entry.historysource = True
728 729 entries = self.sources.get(source)
729 730 if not entries:
730 731 entries = set()
731 732 self.sources[source] = entries
732 733 entries.add(entry)
733 734
734 735 def _getorcreateentry(self, filename, node):
735 736 key = (filename, node)
736 737 value = self.entries.get(key)
737 738 if not value:
738 739 value = repackentry(filename, node)
739 740 self.entries[key] = value
740 741
741 742 return value
742 743
743 744 def addcreated(self, value):
744 745 self.created.add(value)
745 746
746 747 class repackentry(object):
747 748 """Simple class representing a single revision entry in the repackledger.
748 749 """
749 750 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
750 751 r'datarepacked', r'historyrepacked', r'gced')
751 752 def __init__(self, filename, node):
752 753 self.filename = filename
753 754 self.node = node
754 755 # If the revision has a data entry in the source
755 756 self.datasource = False
756 757 # If the revision has a history entry in the source
757 758 self.historysource = False
758 759 # If the revision's data entry was repacked into the repack target
759 760 self.datarepacked = False
760 761 # If the revision's history entry was repacked into the repack target
761 762 self.historyrepacked = False
762 763 # If garbage collected
763 764 self.gced = False
764 765
765 766 def repacklockvfs(repo):
766 767 if util.safehasattr(repo, 'name'):
767 768 # Lock in the shared cache so repacks across multiple copies of the same
768 769 # repo are coordinated.
769 770 sharedcachepath = shallowutil.getcachepackpath(
770 771 repo,
771 772 constants.FILEPACK_CATEGORY)
772 773 return vfs.vfs(sharedcachepath)
773 774 else:
774 775 return repo.svfs
General Comments 0
You need to be logged in to leave comments. Login now