##// END OF EJS Templates
stream: prefer keeping an open file handle to volatile file instead of copy...
marmoute -
r52912:a47f09da default
parent child Browse files
Show More
@@ -567,53 +567,113 b' def _filterfull(entry, copy, vfsmap):'
567 567
568 568
569 569 class VolatileManager:
570 """Manage temporary backup of volatile file during stream clone
570 """Manage temporary backups of volatile files during stream clone.
571 571
572 This should be used as a Python context, the copies will be discarded when
573 exiting the context.
572 This class will keep open file handles for the volatile files, writing the
573 smaller ones on disk if the number of open file handles grow too much.
574 574
575 A copy can be done by calling the object on the real path (encoded full
576 path)
575 This should be used as a Python context, the file handles and copies will
576 be discarded when exiting the context.
577 577
578 The backup path can be retrieved using the __getitem__ protocol, obj[path].
579 On file without backup, it will return the unmodified path. (equivalent to
580 `dict.get(x, x)`)
578 The preservation can be done by calling the object on the real path
579 (encoded full path).
580
581 Valid filehandles for any file should be retrieved by calling `open(path)`.
581 582 """
582 583
584 # arbitrarily picked as "it seemed fine" and much higher than the current
585 # usage.
586 MAX_OPEN = 100
587
583 588 def __init__(self):
589 self._counter = 0
590 self._volatile_fps = None
584 591 self._copies = None
585 592 self._dst_dir = None
586 593
587 594 def __enter__(self):
588 if self._copies is not None:
589 msg = "Copies context already open"
590 raise error.ProgrammingError(msg)
595 if self._counter == 0:
596 assert self._volatile_fps is None
597 self._volatile_fps = {}
598 self._counter += 1
599 return self
600
601 def __exit__(self, *args, **kwars):
602 """discard all backups"""
603 self._counter -= 1
604 if self._counter == 0:
605 for _size, fp in self._volatile_fps.values():
606 fp.close()
607 self._volatile_fps = None
608 if self._copies is not None:
609 for tmp in self._copies.values():
610 util.tryunlink(tmp)
611 util.tryrmdir(self._dst_dir)
612 self._copies = None
613 self._dst_dir = None
614 assert self._volatile_fps is None
615 assert self._copies is None
616 assert self._dst_dir is None
617
618 def _init_tmp_copies(self):
619 """prepare a temporary directory to save volatile files
620
621 This will be used as backup if we have too many files open"""
622 assert 0 < self._counter
623 assert self._copies is None
624 assert self._dst_dir is None
591 625 self._copies = {}
592 626 self._dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
593 return self
627
628 def _flush_some_on_disk(self):
629 """move some of the open files to tempory files on disk"""
630 if self._copies is None:
631 self._init_tmp_copies()
632 flush_count = self.MAX_OPEN // 2
633 for src, (size, fp) in sorted(self._volatile_fps.items())[:flush_count]:
634 prefix = os.path.basename(src)
635 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
636 self._copies[src] = dst
637 os.close(fd)
638 # we no longer hardlink, but on the other hand we rarely do this,
639 # and we do it for the smallest file only and not at all in the
640 # common case.
641 with open(dst, 'wb') as bck:
642 fp.seek(0)
643 bck.write(fp.read())
644 del self._volatile_fps[src]
645 fp.close()
646
647 def _keep_one(self, src):
648 """preserve an open file handle for a given path"""
649 # store the file quickly to ensure we close it if any error happens
650 _, fp = self._volatile_fps[src] = (None, open(src, 'rb'))
651 fp.seek(0, os.SEEK_END)
652 size = fp.tell()
653 self._volatile_fps[src] = (size, fp)
594 654
595 655 def __call__(self, src):
596 """create a backup of the file at src"""
597 prefix = os.path.basename(src)
598 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
599 os.close(fd)
600 self._copies[src] = dst
601 util.copyfiles(src, dst, hardlink=True)
602 return dst
656 """preserve the volatile file at src"""
657 assert 0 < self._counter
658 if len(self._volatile_fps) >= (self.MAX_OPEN - 1):
659 self._flush_some_on_disk()
660 self._keep_one(src)
603 661
604 662 @contextlib.contextmanager
605 663 def open(self, src):
606 actual_path = self._copies.get(src, src)
607 with open(actual_path, 'rb') as fp:
664 assert 0 < self._counter
665 entry = self._volatile_fps.get(src)
666 if entry is not None:
667 _size, fp = entry
668 fp.seek(0)
608 669 yield fp
609
610 def __exit__(self, *args, **kwars):
611 """discard all backups"""
612 for tmp in self._copies.values():
613 util.tryunlink(tmp)
614 util.tryrmdir(self._dst_dir)
615 self._copies = None
616 self._dst_dir = None
670 else:
671 if self._copies is None:
672 actual_path = src
673 else:
674 actual_path = self._copies.get(src, src)
675 with open(actual_path, 'rb') as fp:
676 yield fp
617 677
618 678
619 679 def _makemap(repo):
General Comments 0
You need to be logged in to leave comments. Login now