##// END OF EJS Templates
stream: prefer keeping an open file handle to volatile file instead of copy...
marmoute -
r52912:a47f09da default
parent child Browse files
Show More
@@ -567,53 +567,113 b' def _filterfull(entry, copy, vfsmap):'
567
567
568
568
569 class VolatileManager:
569 class VolatileManager:
570 """Manage temporary backup of volatile file during stream clone
570 """Manage temporary backups of volatile files during stream clone.
571
571
572 This should be used as a Python context, the copies will be discarded when
572 This class will keep open file handles for the volatile files, writing the
573 exiting the context.
573 smaller ones on disk if the number of open file handles grow too much.
574
574
575 A copy can be done by calling the object on the real path (encoded full
575 This should be used as a Python context, the file handles and copies will
576 path)
576 be discarded when exiting the context.
577
577
578 The backup path can be retrieved using the __getitem__ protocol, obj[path].
578 The preservation can be done by calling the object on the real path
579 On file without backup, it will return the unmodified path. (equivalent to
579 (encoded full path).
580 `dict.get(x, x)`)
580
581 Valid filehandles for any file should be retrieved by calling `open(path)`.
581 """
582 """
582
583
584 # arbitrarily picked as "it seemed fine" and much higher than the current
585 # usage.
586 MAX_OPEN = 100
587
583 def __init__(self):
588 def __init__(self):
589 self._counter = 0
590 self._volatile_fps = None
584 self._copies = None
591 self._copies = None
585 self._dst_dir = None
592 self._dst_dir = None
586
593
587 def __enter__(self):
594 def __enter__(self):
588 if self._copies is not None:
595 if self._counter == 0:
589 msg = "Copies context already open"
596 assert self._volatile_fps is None
590 raise error.ProgrammingError(msg)
597 self._volatile_fps = {}
591 self._copies = {}
598 self._counter += 1
592 self._dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
593 return self
599 return self
594
600
595 def __call__(self, src):
596 """create a backup of the file at src"""
597 prefix = os.path.basename(src)
598 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
599 os.close(fd)
600 self._copies[src] = dst
601 util.copyfiles(src, dst, hardlink=True)
602 return dst
603
604 @contextlib.contextmanager
605 def open(self, src):
606 actual_path = self._copies.get(src, src)
607 with open(actual_path, 'rb') as fp:
608 yield fp
609
610 def __exit__(self, *args, **kwars):
601 def __exit__(self, *args, **kwars):
611 """discard all backups"""
602 """discard all backups"""
603 self._counter -= 1
604 if self._counter == 0:
605 for _size, fp in self._volatile_fps.values():
606 fp.close()
607 self._volatile_fps = None
608 if self._copies is not None:
612 for tmp in self._copies.values():
609 for tmp in self._copies.values():
613 util.tryunlink(tmp)
610 util.tryunlink(tmp)
614 util.tryrmdir(self._dst_dir)
611 util.tryrmdir(self._dst_dir)
615 self._copies = None
612 self._copies = None
616 self._dst_dir = None
613 self._dst_dir = None
614 assert self._volatile_fps is None
615 assert self._copies is None
616 assert self._dst_dir is None
617
618 def _init_tmp_copies(self):
619 """prepare a temporary directory to save volatile files
620
621 This will be used as backup if we have too many files open"""
622 assert 0 < self._counter
623 assert self._copies is None
624 assert self._dst_dir is None
625 self._copies = {}
626 self._dst_dir = pycompat.mkdtemp(prefix=b'hg-clone-')
627
628 def _flush_some_on_disk(self):
629 """move some of the open files to tempory files on disk"""
630 if self._copies is None:
631 self._init_tmp_copies()
632 flush_count = self.MAX_OPEN // 2
633 for src, (size, fp) in sorted(self._volatile_fps.items())[:flush_count]:
634 prefix = os.path.basename(src)
635 fd, dst = pycompat.mkstemp(prefix=prefix, dir=self._dst_dir)
636 self._copies[src] = dst
637 os.close(fd)
638 # we no longer hardlink, but on the other hand we rarely do this,
639 # and we do it for the smallest file only and not at all in the
640 # common case.
641 with open(dst, 'wb') as bck:
642 fp.seek(0)
643 bck.write(fp.read())
644 del self._volatile_fps[src]
645 fp.close()
646
647 def _keep_one(self, src):
648 """preserve an open file handle for a given path"""
649 # store the file quickly to ensure we close it if any error happens
650 _, fp = self._volatile_fps[src] = (None, open(src, 'rb'))
651 fp.seek(0, os.SEEK_END)
652 size = fp.tell()
653 self._volatile_fps[src] = (size, fp)
654
655 def __call__(self, src):
656 """preserve the volatile file at src"""
657 assert 0 < self._counter
658 if len(self._volatile_fps) >= (self.MAX_OPEN - 1):
659 self._flush_some_on_disk()
660 self._keep_one(src)
661
662 @contextlib.contextmanager
663 def open(self, src):
664 assert 0 < self._counter
665 entry = self._volatile_fps.get(src)
666 if entry is not None:
667 _size, fp = entry
668 fp.seek(0)
669 yield fp
670 else:
671 if self._copies is None:
672 actual_path = src
673 else:
674 actual_path = self._copies.get(src, src)
675 with open(actual_path, 'rb') as fp:
676 yield fp
617
677
618
678
619 def _makemap(repo):
679 def _makemap(repo):
General Comments 0
You need to be logged in to leave comments. Login now