##// END OF EJS Templates
remotefilelog: use list comprehension instead of filter for py3 portability...
Augie Fackler -
r41291:60b3edcc default
parent child Browse files
Show More
@@ -1,778 +1,778 b''
1 1 from __future__ import absolute_import
2 2
3 3 import os
4 4 import time
5 5
6 6 from mercurial.i18n import _
7 7 from mercurial.node import (
8 8 nullid,
9 9 short,
10 10 )
11 11 from mercurial import (
12 12 encoding,
13 13 error,
14 14 mdiff,
15 15 policy,
16 16 pycompat,
17 17 scmutil,
18 18 util,
19 19 vfs,
20 20 )
21 21 from mercurial.utils import procutil
22 22 from . import (
23 23 constants,
24 24 contentstore,
25 25 datapack,
26 26 extutil,
27 27 historypack,
28 28 metadatastore,
29 29 shallowutil,
30 30 )
31 31
32 32 osutil = policy.importmod(r'osutil')
33 33
34 34 class RepackAlreadyRunning(error.Abort):
35 35 pass
36 36
37 37 def backgroundrepack(repo, incremental=True, packsonly=False):
38 38 cmd = [procutil.hgexecutable(), '-R', repo.origroot, 'repack']
39 39 msg = _("(running background repack)\n")
40 40 if incremental:
41 41 cmd.append('--incremental')
42 42 msg = _("(running background incremental repack)\n")
43 43 if packsonly:
44 44 cmd.append('--packsonly')
45 45 repo.ui.warn(msg)
46 46 procutil.runbgcommand(cmd, encoding.environ)
47 47
48 48 def fullrepack(repo, options=None):
49 49 """If ``packsonly`` is True, stores creating only loose objects are skipped.
50 50 """
51 51 if util.safehasattr(repo, 'shareddatastores'):
52 52 datasource = contentstore.unioncontentstore(
53 53 *repo.shareddatastores)
54 54 historysource = metadatastore.unionmetadatastore(
55 55 *repo.sharedhistorystores,
56 56 allowincomplete=True)
57 57
58 58 packpath = shallowutil.getcachepackpath(
59 59 repo,
60 60 constants.FILEPACK_CATEGORY)
61 61 _runrepack(repo, datasource, historysource, packpath,
62 62 constants.FILEPACK_CATEGORY, options=options)
63 63
64 64 if util.safehasattr(repo.manifestlog, 'datastore'):
65 65 localdata, shareddata = _getmanifeststores(repo)
66 66 lpackpath, ldstores, lhstores = localdata
67 67 spackpath, sdstores, shstores = shareddata
68 68
69 69 # Repack the shared manifest store
70 70 datasource = contentstore.unioncontentstore(*sdstores)
71 71 historysource = metadatastore.unionmetadatastore(
72 72 *shstores,
73 73 allowincomplete=True)
74 74 _runrepack(repo, datasource, historysource, spackpath,
75 75 constants.TREEPACK_CATEGORY, options=options)
76 76
77 77 # Repack the local manifest store
78 78 datasource = contentstore.unioncontentstore(
79 79 *ldstores,
80 80 allowincomplete=True)
81 81 historysource = metadatastore.unionmetadatastore(
82 82 *lhstores,
83 83 allowincomplete=True)
84 84 _runrepack(repo, datasource, historysource, lpackpath,
85 85 constants.TREEPACK_CATEGORY, options=options)
86 86
87 87 def incrementalrepack(repo, options=None):
88 88 """This repacks the repo by looking at the distribution of pack files in the
89 89 repo and performing the most minimal repack to keep the repo in good shape.
90 90 """
91 91 if util.safehasattr(repo, 'shareddatastores'):
92 92 packpath = shallowutil.getcachepackpath(
93 93 repo,
94 94 constants.FILEPACK_CATEGORY)
95 95 _incrementalrepack(repo,
96 96 repo.shareddatastores,
97 97 repo.sharedhistorystores,
98 98 packpath,
99 99 constants.FILEPACK_CATEGORY,
100 100 options=options)
101 101
102 102 if util.safehasattr(repo.manifestlog, 'datastore'):
103 103 localdata, shareddata = _getmanifeststores(repo)
104 104 lpackpath, ldstores, lhstores = localdata
105 105 spackpath, sdstores, shstores = shareddata
106 106
107 107 # Repack the shared manifest store
108 108 _incrementalrepack(repo,
109 109 sdstores,
110 110 shstores,
111 111 spackpath,
112 112 constants.TREEPACK_CATEGORY,
113 113 options=options)
114 114
115 115 # Repack the local manifest store
116 116 _incrementalrepack(repo,
117 117 ldstores,
118 118 lhstores,
119 119 lpackpath,
120 120 constants.TREEPACK_CATEGORY,
121 121 allowincompletedata=True,
122 122 options=options)
123 123
124 124 def _getmanifeststores(repo):
125 125 shareddatastores = repo.manifestlog.shareddatastores
126 126 localdatastores = repo.manifestlog.localdatastores
127 127 sharedhistorystores = repo.manifestlog.sharedhistorystores
128 128 localhistorystores = repo.manifestlog.localhistorystores
129 129
130 130 sharedpackpath = shallowutil.getcachepackpath(repo,
131 131 constants.TREEPACK_CATEGORY)
132 132 localpackpath = shallowutil.getlocalpackpath(repo.svfs.vfs.base,
133 133 constants.TREEPACK_CATEGORY)
134 134
135 135 return ((localpackpath, localdatastores, localhistorystores),
136 136 (sharedpackpath, shareddatastores, sharedhistorystores))
137 137
138 138 def _topacks(packpath, files, constructor):
139 139 paths = list(os.path.join(packpath, p) for p in files)
140 140 packs = list(constructor(p) for p in paths)
141 141 return packs
142 142
143 143 def _deletebigpacks(repo, folder, files):
144 144 """Deletes packfiles that are bigger than ``packs.maxpacksize``.
145 145
146 146 Returns ``files` with the removed files omitted."""
147 147 maxsize = repo.ui.configbytes("packs", "maxpacksize")
148 148 if maxsize <= 0:
149 149 return files
150 150
151 151 # This only considers datapacks today, but we could broaden it to include
152 152 # historypacks.
153 153 VALIDEXTS = [".datapack", ".dataidx"]
154 154
155 155 # Either an oversize index or datapack will trigger cleanup of the whole
156 156 # pack:
157 157 oversized = set([os.path.splitext(path)[0] for path, ftype, stat in files
158 158 if (stat.st_size > maxsize and (os.path.splitext(path)[1]
159 159 in VALIDEXTS))])
160 160
161 161 for rootfname in oversized:
162 162 rootpath = os.path.join(folder, rootfname)
163 163 for ext in VALIDEXTS:
164 164 path = rootpath + ext
165 165 repo.ui.debug('removing oversize packfile %s (%s)\n' %
166 166 (path, util.bytecount(os.stat(path).st_size)))
167 167 os.unlink(path)
168 168 return [row for row in files if os.path.basename(row[0]) not in oversized]
169 169
170 170 def _incrementalrepack(repo, datastore, historystore, packpath, category,
171 171 allowincompletedata=False, options=None):
172 172 shallowutil.mkstickygroupdir(repo.ui, packpath)
173 173
174 174 files = osutil.listdir(packpath, stat=True)
175 175 files = _deletebigpacks(repo, packpath, files)
176 176 datapacks = _topacks(packpath,
177 177 _computeincrementaldatapack(repo.ui, files),
178 178 datapack.datapack)
179 179 datapacks.extend(s for s in datastore
180 180 if not isinstance(s, datapack.datapackstore))
181 181
182 182 historypacks = _topacks(packpath,
183 183 _computeincrementalhistorypack(repo.ui, files),
184 184 historypack.historypack)
185 185 historypacks.extend(s for s in historystore
186 186 if not isinstance(s, historypack.historypackstore))
187 187
188 188 # ``allhistory{files,packs}`` contains all known history packs, even ones we
189 189 # don't plan to repack. They are used during the datapack repack to ensure
190 190 # good ordering of nodes.
191 191 allhistoryfiles = _allpackfileswithsuffix(files, historypack.PACKSUFFIX,
192 192 historypack.INDEXSUFFIX)
193 193 allhistorypacks = _topacks(packpath,
194 194 (f for f, mode, stat in allhistoryfiles),
195 195 historypack.historypack)
196 196 allhistorypacks.extend(s for s in historystore
197 197 if not isinstance(s, historypack.historypackstore))
198 198 _runrepack(repo,
199 199 contentstore.unioncontentstore(
200 200 *datapacks,
201 201 allowincomplete=allowincompletedata),
202 202 metadatastore.unionmetadatastore(
203 203 *historypacks,
204 204 allowincomplete=True),
205 205 packpath, category,
206 206 fullhistory=metadatastore.unionmetadatastore(
207 207 *allhistorypacks,
208 208 allowincomplete=True),
209 209 options=options)
210 210
211 211 def _computeincrementaldatapack(ui, files):
212 212 opts = {
213 213 'gencountlimit' : ui.configint(
214 214 'remotefilelog', 'data.gencountlimit'),
215 215 'generations' : ui.configlist(
216 216 'remotefilelog', 'data.generations'),
217 217 'maxrepackpacks' : ui.configint(
218 218 'remotefilelog', 'data.maxrepackpacks'),
219 219 'repackmaxpacksize' : ui.configbytes(
220 220 'remotefilelog', 'data.repackmaxpacksize'),
221 221 'repacksizelimit' : ui.configbytes(
222 222 'remotefilelog', 'data.repacksizelimit'),
223 223 }
224 224
225 225 packfiles = _allpackfileswithsuffix(
226 226 files, datapack.PACKSUFFIX, datapack.INDEXSUFFIX)
227 227 return _computeincrementalpack(packfiles, opts)
228 228
229 229 def _computeincrementalhistorypack(ui, files):
230 230 opts = {
231 231 'gencountlimit' : ui.configint(
232 232 'remotefilelog', 'history.gencountlimit'),
233 233 'generations' : ui.configlist(
234 234 'remotefilelog', 'history.generations', ['100MB']),
235 235 'maxrepackpacks' : ui.configint(
236 236 'remotefilelog', 'history.maxrepackpacks'),
237 237 'repackmaxpacksize' : ui.configbytes(
238 238 'remotefilelog', 'history.repackmaxpacksize', '400MB'),
239 239 'repacksizelimit' : ui.configbytes(
240 240 'remotefilelog', 'history.repacksizelimit'),
241 241 }
242 242
243 243 packfiles = _allpackfileswithsuffix(
244 244 files, historypack.PACKSUFFIX, historypack.INDEXSUFFIX)
245 245 return _computeincrementalpack(packfiles, opts)
246 246
247 247 def _allpackfileswithsuffix(files, packsuffix, indexsuffix):
248 248 result = []
249 249 fileset = set(fn for fn, mode, stat in files)
250 250 for filename, mode, stat in files:
251 251 if not filename.endswith(packsuffix):
252 252 continue
253 253
254 254 prefix = filename[:-len(packsuffix)]
255 255
256 256 # Don't process a pack if it doesn't have an index.
257 257 if (prefix + indexsuffix) not in fileset:
258 258 continue
259 259 result.append((prefix, mode, stat))
260 260
261 261 return result
262 262
263 263 def _computeincrementalpack(files, opts):
264 264 """Given a set of pack files along with the configuration options, this
265 265 function computes the list of files that should be packed as part of an
266 266 incremental repack.
267 267
268 268 It tries to strike a balance between keeping incremental repacks cheap (i.e.
269 269 packing small things when possible, and rolling the packs up to the big ones
270 270 over time).
271 271 """
272 272
273 273 limits = list(sorted((util.sizetoint(s) for s in opts['generations']),
274 274 reverse=True))
275 275 limits.append(0)
276 276
277 277 # Group the packs by generation (i.e. by size)
278 278 generations = []
279 279 for i in pycompat.xrange(len(limits)):
280 280 generations.append([])
281 281
282 282 sizes = {}
283 283 for prefix, mode, stat in files:
284 284 size = stat.st_size
285 285 if size > opts['repackmaxpacksize']:
286 286 continue
287 287
288 288 sizes[prefix] = size
289 289 for i, limit in enumerate(limits):
290 290 if size > limit:
291 291 generations[i].append(prefix)
292 292 break
293 293
294 294 # Steps for picking what packs to repack:
295 295 # 1. Pick the largest generation with > gencountlimit pack files.
296 296 # 2. Take the smallest three packs.
297 297 # 3. While total-size-of-packs < repacksizelimit: add another pack
298 298
299 299 # Find the largest generation with more than gencountlimit packs
300 300 genpacks = []
301 301 for i, limit in enumerate(limits):
302 302 if len(generations[i]) > opts['gencountlimit']:
303 303 # Sort to be smallest last, for easy popping later
304 304 genpacks.extend(sorted(generations[i], reverse=True,
305 305 key=lambda x: sizes[x]))
306 306 break
307 307
308 308 # Take as many packs from the generation as we can
309 309 chosenpacks = genpacks[-3:]
310 310 genpacks = genpacks[:-3]
311 311 repacksize = sum(sizes[n] for n in chosenpacks)
312 312 while (repacksize < opts['repacksizelimit'] and genpacks and
313 313 len(chosenpacks) < opts['maxrepackpacks']):
314 314 chosenpacks.append(genpacks.pop())
315 315 repacksize += sizes[chosenpacks[-1]]
316 316
317 317 return chosenpacks
318 318
319 319 def _runrepack(repo, data, history, packpath, category, fullhistory=None,
320 320 options=None):
321 321 shallowutil.mkstickygroupdir(repo.ui, packpath)
322 322
323 323 def isold(repo, filename, node):
324 324 """Check if the file node is older than a limit.
325 325 Unless a limit is specified in the config the default limit is taken.
326 326 """
327 327 filectx = repo.filectx(filename, fileid=node)
328 328 filetime = repo[filectx.linkrev()].date()
329 329
330 330 ttl = repo.ui.configint('remotefilelog', 'nodettl')
331 331
332 332 limit = time.time() - ttl
333 333 return filetime[0] < limit
334 334
335 335 garbagecollect = repo.ui.configbool('remotefilelog', 'gcrepack')
336 336 if not fullhistory:
337 337 fullhistory = history
338 338 packer = repacker(repo, data, history, fullhistory, category,
339 339 gc=garbagecollect, isold=isold, options=options)
340 340
341 341 with datapack.mutabledatapack(repo.ui, packpath, version=2) as dpack:
342 342 with historypack.mutablehistorypack(repo.ui, packpath) as hpack:
343 343 try:
344 344 packer.run(dpack, hpack)
345 345 except error.LockHeld:
346 346 raise RepackAlreadyRunning(_("skipping repack - another repack "
347 347 "is already running"))
348 348
349 349 def keepset(repo, keyfn, lastkeepkeys=None):
350 350 """Computes a keepset which is not garbage collected.
351 351 'keyfn' is a function that maps filename, node to a unique key.
352 352 'lastkeepkeys' is an optional argument and if provided the keepset
353 353 function updates lastkeepkeys with more keys and returns the result.
354 354 """
355 355 if not lastkeepkeys:
356 356 keepkeys = set()
357 357 else:
358 358 keepkeys = lastkeepkeys
359 359
360 360 # We want to keep:
361 361 # 1. Working copy parent
362 362 # 2. Draft commits
363 363 # 3. Parents of draft commits
364 364 # 4. Pullprefetch and bgprefetchrevs revsets if specified
365 365 revs = ['.', 'draft()', 'parents(draft())']
366 366 prefetchrevs = repo.ui.config('remotefilelog', 'pullprefetch', None)
367 367 if prefetchrevs:
368 368 revs.append('(%s)' % prefetchrevs)
369 369 prefetchrevs = repo.ui.config('remotefilelog', 'bgprefetchrevs', None)
370 370 if prefetchrevs:
371 371 revs.append('(%s)' % prefetchrevs)
372 372 revs = '+'.join(revs)
373 373
374 374 revs = ['sort((%s), "topo")' % revs]
375 375 keep = scmutil.revrange(repo, revs)
376 376
377 377 processed = set()
378 378 lastmanifest = None
379 379
380 380 # process the commits in toposorted order starting from the oldest
381 381 for r in reversed(keep._list):
382 382 if repo[r].p1().rev() in processed:
383 383 # if the direct parent has already been processed
384 384 # then we only need to process the delta
385 385 m = repo[r].manifestctx().readdelta()
386 386 else:
387 387 # otherwise take the manifest and diff it
388 388 # with the previous manifest if one exists
389 389 if lastmanifest:
390 390 m = repo[r].manifest().diff(lastmanifest)
391 391 else:
392 392 m = repo[r].manifest()
393 393 lastmanifest = repo[r].manifest()
394 394 processed.add(r)
395 395
396 396 # populate keepkeys with keys from the current manifest
397 397 if type(m) is dict:
398 398 # m is a result of diff of two manifests and is a dictionary that
399 399 # maps filename to ((newnode, newflag), (oldnode, oldflag)) tuple
400 400 for filename, diff in m.iteritems():
401 401 if diff[0][0] is not None:
402 402 keepkeys.add(keyfn(filename, diff[0][0]))
403 403 else:
404 404 # m is a manifest object
405 405 for filename, filenode in m.iteritems():
406 406 keepkeys.add(keyfn(filename, filenode))
407 407
408 408 return keepkeys
409 409
410 410 class repacker(object):
411 411 """Class for orchestrating the repack of data and history information into a
412 412 new format.
413 413 """
414 414 def __init__(self, repo, data, history, fullhistory, category, gc=False,
415 415 isold=None, options=None):
416 416 self.repo = repo
417 417 self.data = data
418 418 self.history = history
419 419 self.fullhistory = fullhistory
420 420 self.unit = constants.getunits(category)
421 421 self.garbagecollect = gc
422 422 self.options = options
423 423 if self.garbagecollect:
424 424 if not isold:
425 425 raise ValueError("Function 'isold' is not properly specified")
426 426 # use (filename, node) tuple as a keepset key
427 427 self.keepkeys = keepset(repo, lambda f, n : (f, n))
428 428 self.isold = isold
429 429
430 430 def run(self, targetdata, targethistory):
431 431 ledger = repackledger()
432 432
433 433 with extutil.flock(repacklockvfs(self.repo).join("repacklock"),
434 434 _('repacking %s') % self.repo.origroot, timeout=0):
435 435 self.repo.hook('prerepack')
436 436
437 437 # Populate ledger from source
438 438 self.data.markledger(ledger, options=self.options)
439 439 self.history.markledger(ledger, options=self.options)
440 440
441 441 # Run repack
442 442 self.repackdata(ledger, targetdata)
443 443 self.repackhistory(ledger, targethistory)
444 444
445 445 # Call cleanup on each source
446 446 for source in ledger.sources:
447 447 source.cleanup(ledger)
448 448
449 449 def _chainorphans(self, ui, filename, nodes, orphans, deltabases):
450 450 """Reorderes ``orphans`` into a single chain inside ``nodes`` and
451 451 ``deltabases``.
452 452
453 453 We often have orphan entries (nodes without a base that aren't
454 454 referenced by other nodes -- i.e., part of a chain) due to gaps in
455 455 history. Rather than store them as individual fulltexts, we prefer to
456 456 insert them as one chain sorted by size.
457 457 """
458 458 if not orphans:
459 459 return nodes
460 460
461 461 def getsize(node, default=0):
462 462 meta = self.data.getmeta(filename, node)
463 463 if constants.METAKEYSIZE in meta:
464 464 return meta[constants.METAKEYSIZE]
465 465 else:
466 466 return default
467 467
468 468 # Sort orphans by size; biggest first is preferred, since it's more
469 469 # likely to be the newest version assuming files grow over time.
470 470 # (Sort by node first to ensure the sort is stable.)
471 471 orphans = sorted(orphans)
472 472 orphans = list(sorted(orphans, key=getsize, reverse=True))
473 473 if ui.debugflag:
474 474 ui.debug("%s: orphan chain: %s\n" % (filename,
475 475 ", ".join([short(s) for s in orphans])))
476 476
477 477 # Create one contiguous chain and reassign deltabases.
478 478 for i, node in enumerate(orphans):
479 479 if i == 0:
480 480 deltabases[node] = (nullid, 0)
481 481 else:
482 482 parent = orphans[i - 1]
483 483 deltabases[node] = (parent, deltabases[parent][1] + 1)
484 nodes = filter(lambda node: node not in orphans, nodes)
484 nodes = [n for n in nodes if n not in orphans]
485 485 nodes += orphans
486 486 return nodes
487 487
488 488 def repackdata(self, ledger, target):
489 489 ui = self.repo.ui
490 490 maxchainlen = ui.configint('packs', 'maxchainlen', 1000)
491 491
492 492 byfile = {}
493 493 for entry in ledger.entries.itervalues():
494 494 if entry.datasource:
495 495 byfile.setdefault(entry.filename, {})[entry.node] = entry
496 496
497 497 count = 0
498 498 repackprogress = ui.makeprogress(_("repacking data"), unit=self.unit,
499 499 total=len(byfile))
500 500 for filename, entries in sorted(byfile.iteritems()):
501 501 repackprogress.update(count)
502 502
503 503 ancestors = {}
504 504 nodes = list(node for node in entries)
505 505 nohistory = []
506 506 buildprogress = ui.makeprogress(_("building history"), unit='nodes',
507 507 total=len(nodes))
508 508 for i, node in enumerate(nodes):
509 509 if node in ancestors:
510 510 continue
511 511 buildprogress.update(i)
512 512 try:
513 513 ancestors.update(self.fullhistory.getancestors(filename,
514 514 node, known=ancestors))
515 515 except KeyError:
516 516 # Since we're packing data entries, we may not have the
517 517 # corresponding history entries for them. It's not a big
518 518 # deal, but the entries won't be delta'd perfectly.
519 519 nohistory.append(node)
520 520 buildprogress.complete()
521 521
522 522 # Order the nodes children first, so we can produce reverse deltas
523 523 orderednodes = list(reversed(self._toposort(ancestors)))
524 524 if len(nohistory) > 0:
525 525 ui.debug('repackdata: %d nodes without history\n' %
526 526 len(nohistory))
527 527 orderednodes.extend(sorted(nohistory))
528 528
529 529 # Filter orderednodes to just the nodes we want to serialize (it
530 530 # currently also has the edge nodes' ancestors).
531 531 orderednodes = list(filter(lambda node: node in nodes,
532 532 orderednodes))
533 533
534 534 # Garbage collect old nodes:
535 535 if self.garbagecollect:
536 536 neworderednodes = []
537 537 for node in orderednodes:
538 538 # If the node is old and is not in the keepset, we skip it,
539 539 # and mark as garbage collected
540 540 if ((filename, node) not in self.keepkeys and
541 541 self.isold(self.repo, filename, node)):
542 542 entries[node].gced = True
543 543 continue
544 544 neworderednodes.append(node)
545 545 orderednodes = neworderednodes
546 546
547 547 # Compute delta bases for nodes:
548 548 deltabases = {}
549 549 nobase = set()
550 550 referenced = set()
551 551 nodes = set(nodes)
552 552 processprogress = ui.makeprogress(_("processing nodes"),
553 553 unit='nodes',
554 554 total=len(orderednodes))
555 555 for i, node in enumerate(orderednodes):
556 556 processprogress.update(i)
557 557 # Find delta base
558 558 # TODO: allow delta'ing against most recent descendant instead
559 559 # of immediate child
560 560 deltatuple = deltabases.get(node, None)
561 561 if deltatuple is None:
562 562 deltabase, chainlen = nullid, 0
563 563 deltabases[node] = (nullid, 0)
564 564 nobase.add(node)
565 565 else:
566 566 deltabase, chainlen = deltatuple
567 567 referenced.add(deltabase)
568 568
569 569 # Use available ancestor information to inform our delta choices
570 570 ancestorinfo = ancestors.get(node)
571 571 if ancestorinfo:
572 572 p1, p2, linknode, copyfrom = ancestorinfo
573 573
574 574 # The presence of copyfrom means we're at a point where the
575 575 # file was copied from elsewhere. So don't attempt to do any
576 576 # deltas with the other file.
577 577 if copyfrom:
578 578 p1 = nullid
579 579
580 580 if chainlen < maxchainlen:
581 581 # Record this child as the delta base for its parents.
582 582 # This may be non optimal, since the parents may have
583 583 # many children, and this will only choose the last one.
584 584 # TODO: record all children and try all deltas to find
585 585 # best
586 586 if p1 != nullid:
587 587 deltabases[p1] = (node, chainlen + 1)
588 588 if p2 != nullid:
589 589 deltabases[p2] = (node, chainlen + 1)
590 590
591 591 # experimental config: repack.chainorphansbysize
592 592 if ui.configbool('repack', 'chainorphansbysize'):
593 593 orphans = nobase - referenced
594 594 orderednodes = self._chainorphans(ui, filename, orderednodes,
595 595 orphans, deltabases)
596 596
597 597 # Compute deltas and write to the pack
598 598 for i, node in enumerate(orderednodes):
599 599 deltabase, chainlen = deltabases[node]
600 600 # Compute delta
601 601 # TODO: Optimize the deltachain fetching. Since we're
602 602 # iterating over the different version of the file, we may
603 603 # be fetching the same deltachain over and over again.
604 604 meta = None
605 605 if deltabase != nullid:
606 606 deltaentry = self.data.getdelta(filename, node)
607 607 delta, deltabasename, origdeltabase, meta = deltaentry
608 608 size = meta.get(constants.METAKEYSIZE)
609 609 if (deltabasename != filename or origdeltabase != deltabase
610 610 or size is None):
611 611 deltabasetext = self.data.get(filename, deltabase)
612 612 original = self.data.get(filename, node)
613 613 size = len(original)
614 614 delta = mdiff.textdiff(deltabasetext, original)
615 615 else:
616 616 delta = self.data.get(filename, node)
617 617 size = len(delta)
618 618 meta = self.data.getmeta(filename, node)
619 619
620 620 # TODO: don't use the delta if it's larger than the fulltext
621 621 if constants.METAKEYSIZE not in meta:
622 622 meta[constants.METAKEYSIZE] = size
623 623 target.add(filename, node, deltabase, delta, meta)
624 624
625 625 entries[node].datarepacked = True
626 626
627 627 processprogress.complete()
628 628 count += 1
629 629
630 630 repackprogress.complete()
631 631 target.close(ledger=ledger)
632 632
633 633 def repackhistory(self, ledger, target):
634 634 ui = self.repo.ui
635 635
636 636 byfile = {}
637 637 for entry in ledger.entries.itervalues():
638 638 if entry.historysource:
639 639 byfile.setdefault(entry.filename, {})[entry.node] = entry
640 640
641 641 progress = ui.makeprogress(_("repacking history"), unit=self.unit,
642 642 total=len(byfile))
643 643 for filename, entries in sorted(byfile.iteritems()):
644 644 ancestors = {}
645 645 nodes = list(node for node in entries)
646 646
647 647 for node in nodes:
648 648 if node in ancestors:
649 649 continue
650 650 ancestors.update(self.history.getancestors(filename, node,
651 651 known=ancestors))
652 652
653 653 # Order the nodes children first
654 654 orderednodes = reversed(self._toposort(ancestors))
655 655
656 656 # Write to the pack
657 657 dontprocess = set()
658 658 for node in orderednodes:
659 659 p1, p2, linknode, copyfrom = ancestors[node]
660 660
661 661 # If the node is marked dontprocess, but it's also in the
662 662 # explicit entries set, that means the node exists both in this
663 663 # file and in another file that was copied to this file.
664 664 # Usually this happens if the file was copied to another file,
665 665 # then the copy was deleted, then reintroduced without copy
666 666 # metadata. The original add and the new add have the same hash
667 667 # since the content is identical and the parents are null.
668 668 if node in dontprocess and node not in entries:
669 669 # If copyfrom == filename, it means the copy history
670 670 # went to come other file, then came back to this one, so we
671 671 # should continue processing it.
672 672 if p1 != nullid and copyfrom != filename:
673 673 dontprocess.add(p1)
674 674 if p2 != nullid:
675 675 dontprocess.add(p2)
676 676 continue
677 677
678 678 if copyfrom:
679 679 dontprocess.add(p1)
680 680
681 681 target.add(filename, node, p1, p2, linknode, copyfrom)
682 682
683 683 if node in entries:
684 684 entries[node].historyrepacked = True
685 685
686 686 progress.increment()
687 687
688 688 progress.complete()
689 689 target.close(ledger=ledger)
690 690
691 691 def _toposort(self, ancestors):
692 692 def parentfunc(node):
693 693 p1, p2, linknode, copyfrom = ancestors[node]
694 694 parents = []
695 695 if p1 != nullid:
696 696 parents.append(p1)
697 697 if p2 != nullid:
698 698 parents.append(p2)
699 699 return parents
700 700
701 701 sortednodes = shallowutil.sortnodes(ancestors.keys(), parentfunc)
702 702 return sortednodes
703 703
704 704 class repackledger(object):
705 705 """Storage for all the bookkeeping that happens during a repack. It contains
706 706 the list of revisions being repacked, what happened to each revision, and
707 707 which source store contained which revision originally (for later cleanup).
708 708 """
709 709 def __init__(self):
710 710 self.entries = {}
711 711 self.sources = {}
712 712 self.created = set()
713 713
714 714 def markdataentry(self, source, filename, node):
715 715 """Mark the given filename+node revision as having a data rev in the
716 716 given source.
717 717 """
718 718 entry = self._getorcreateentry(filename, node)
719 719 entry.datasource = True
720 720 entries = self.sources.get(source)
721 721 if not entries:
722 722 entries = set()
723 723 self.sources[source] = entries
724 724 entries.add(entry)
725 725
726 726 def markhistoryentry(self, source, filename, node):
727 727 """Mark the given filename+node revision as having a history rev in the
728 728 given source.
729 729 """
730 730 entry = self._getorcreateentry(filename, node)
731 731 entry.historysource = True
732 732 entries = self.sources.get(source)
733 733 if not entries:
734 734 entries = set()
735 735 self.sources[source] = entries
736 736 entries.add(entry)
737 737
738 738 def _getorcreateentry(self, filename, node):
739 739 key = (filename, node)
740 740 value = self.entries.get(key)
741 741 if not value:
742 742 value = repackentry(filename, node)
743 743 self.entries[key] = value
744 744
745 745 return value
746 746
747 747 def addcreated(self, value):
748 748 self.created.add(value)
749 749
750 750 class repackentry(object):
751 751 """Simple class representing a single revision entry in the repackledger.
752 752 """
753 753 __slots__ = (r'filename', r'node', r'datasource', r'historysource',
754 754 r'datarepacked', r'historyrepacked', r'gced')
755 755 def __init__(self, filename, node):
756 756 self.filename = filename
757 757 self.node = node
758 758 # If the revision has a data entry in the source
759 759 self.datasource = False
760 760 # If the revision has a history entry in the source
761 761 self.historysource = False
762 762 # If the revision's data entry was repacked into the repack target
763 763 self.datarepacked = False
764 764 # If the revision's history entry was repacked into the repack target
765 765 self.historyrepacked = False
766 766 # If garbage collected
767 767 self.gced = False
768 768
769 769 def repacklockvfs(repo):
770 770 if util.safehasattr(repo, 'name'):
771 771 # Lock in the shared cache so repacks across multiple copies of the same
772 772 # repo are coordinated.
773 773 sharedcachepath = shallowutil.getcachepackpath(
774 774 repo,
775 775 constants.FILEPACK_CATEGORY)
776 776 return vfs.vfs(sharedcachepath)
777 777 else:
778 778 return repo.svfs
General Comments 0
You need to be logged in to leave comments. Login now