Show More
@@ -0,0 +1,138 b'' | |||
|
1 | # Copyright Mercurial Contributors | |
|
2 | # | |
|
3 | # This software may be used and distributed according to the terms of the | |
|
4 | # GNU General Public License version 2 or any later version. | |
|
5 | ||
|
6 | import contextlib | |
|
7 | ||
|
8 | from ..i18n import _ | |
|
9 | from .. import ( | |
|
10 | error, | |
|
11 | util, | |
|
12 | ) | |
|
13 | ||
|
14 | ||
|
15 | _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB | |
|
16 | ||
|
17 | PARTIAL_READ_MSG = _( | |
|
18 | b'partial read of revlog %s; expected %d bytes from offset %d, got %d' | |
|
19 | ) | |
|
20 | ||
|
21 | ||
|
22 | def _is_power_of_two(n): | |
|
23 | return (n & (n - 1) == 0) and n != 0 | |
|
24 | ||
|
25 | ||
|
26 | class randomaccessfile(object): | |
|
27 | """Accessing arbitrary chuncks of data within a file, with some caching""" | |
|
28 | ||
|
29 | def __init__( | |
|
30 | self, | |
|
31 | opener, | |
|
32 | filename, | |
|
33 | default_cached_chunk_size, | |
|
34 | initial_cache=None, | |
|
35 | ): | |
|
36 | # Required by bitwise manipulation below | |
|
37 | assert _is_power_of_two(default_cached_chunk_size) | |
|
38 | ||
|
39 | self.opener = opener | |
|
40 | self.filename = filename | |
|
41 | self.default_cached_chunk_size = default_cached_chunk_size | |
|
42 | self.writing_handle = None # This is set from revlog.py | |
|
43 | self._cached_chunk = b'' | |
|
44 | self._cached_chunk_position = 0 # Offset from the start of the file | |
|
45 | if initial_cache: | |
|
46 | self._cached_chunk_position, self._cached_chunk = initial_cache | |
|
47 | ||
|
48 | def clear_cache(self): | |
|
49 | self._cached_chunk = b'' | |
|
50 | self._cached_chunk_position = 0 | |
|
51 | ||
|
52 | def _open(self, mode=b'r'): | |
|
53 | """Return a file object""" | |
|
54 | return self.opener(self.filename, mode=mode) | |
|
55 | ||
|
56 | @contextlib.contextmanager | |
|
57 | def _open_read(self, existing_file_obj=None): | |
|
58 | """File object suitable for reading data""" | |
|
59 | # Use explicit file handle, if given. | |
|
60 | if existing_file_obj is not None: | |
|
61 | yield existing_file_obj | |
|
62 | ||
|
63 | # Use a file handle being actively used for writes, if available. | |
|
64 | # There is some danger to doing this because reads will seek the | |
|
65 | # file. However, revlog._writeentry performs a SEEK_END before all | |
|
66 | # writes, so we should be safe. | |
|
67 | elif self.writing_handle: | |
|
68 | yield self.writing_handle | |
|
69 | ||
|
70 | # Otherwise open a new file handle. | |
|
71 | else: | |
|
72 | with self._open() as fp: | |
|
73 | yield fp | |
|
74 | ||
|
75 | def read_chunk(self, offset, length, existing_file_obj=None): | |
|
76 | """Read a chunk of bytes from the file. | |
|
77 | ||
|
78 | Accepts an absolute offset, length to read, and an optional existing | |
|
79 | file handle to read from. | |
|
80 | ||
|
81 | If an existing file handle is passed, it will be seeked and the | |
|
82 | original seek position will NOT be restored. | |
|
83 | ||
|
84 | Returns a str or buffer of raw byte data. | |
|
85 | ||
|
86 | Raises if the requested number of bytes could not be read. | |
|
87 | """ | |
|
88 | end = offset + length | |
|
89 | cache_start = self._cached_chunk_position | |
|
90 | cache_end = cache_start + len(self._cached_chunk) | |
|
91 | # Is the requested chunk within the cache? | |
|
92 | if cache_start <= offset and end <= cache_end: | |
|
93 | if cache_start == offset and end == cache_end: | |
|
94 | return self._cached_chunk # avoid a copy | |
|
95 | relative_start = offset - cache_start | |
|
96 | return util.buffer(self._cached_chunk, relative_start, length) | |
|
97 | ||
|
98 | return self._read_and_update_cache(offset, length, existing_file_obj) | |
|
99 | ||
|
100 | def _read_and_update_cache(self, offset, length, existing_file_obj=None): | |
|
101 | # Cache data both forward and backward around the requested | |
|
102 | # data, in a fixed size window. This helps speed up operations | |
|
103 | # involving reading the revlog backwards. | |
|
104 | real_offset = offset & ~(self.default_cached_chunk_size - 1) | |
|
105 | real_length = ( | |
|
106 | (offset + length + self.default_cached_chunk_size) | |
|
107 | & ~(self.default_cached_chunk_size - 1) | |
|
108 | ) - real_offset | |
|
109 | with self._open_read(existing_file_obj) as file_obj: | |
|
110 | file_obj.seek(real_offset) | |
|
111 | data = file_obj.read(real_length) | |
|
112 | ||
|
113 | self._add_cached_chunk(real_offset, data) | |
|
114 | ||
|
115 | relative_offset = offset - real_offset | |
|
116 | got = len(data) - relative_offset | |
|
117 | if got < length: | |
|
118 | message = PARTIAL_READ_MSG % (self.filename, length, offset, got) | |
|
119 | raise error.RevlogError(message) | |
|
120 | ||
|
121 | if offset != real_offset or real_length != length: | |
|
122 | return util.buffer(data, relative_offset, length) | |
|
123 | return data | |
|
124 | ||
|
125 | def _add_cached_chunk(self, offset, data): | |
|
126 | """Add to or replace the cached data chunk. | |
|
127 | ||
|
128 | Accepts an absolute offset and the data that is at that location. | |
|
129 | """ | |
|
130 | if ( | |
|
131 | self._cached_chunk_position + len(self._cached_chunk) == offset | |
|
132 | and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE | |
|
133 | ): | |
|
134 | # add to existing cache | |
|
135 | self._cached_chunk += data | |
|
136 | else: | |
|
137 | self._cached_chunk = data | |
|
138 | self._cached_chunk_position = offset |
@@ -454,6 +454,7 b' class changelog(revlog.revlog):' | |||
|
454 | 454 | self.opener = _delayopener( |
|
455 | 455 | self._realopener, self._indexfile, self._delaybuf |
|
456 | 456 | ) |
|
457 | self._segmentfile.opener = self.opener | |
|
457 | 458 | self._delayed = True |
|
458 | 459 | tr.addpending(b'cl-%i' % id(self), self._writepending) |
|
459 | 460 | tr.addfinalize(b'cl-%i' % id(self), self._finalize) |
@@ -462,6 +463,7 b' class changelog(revlog.revlog):' | |||
|
462 | 463 | """finalize index updates""" |
|
463 | 464 | self._delayed = False |
|
464 | 465 | self.opener = self._realopener |
|
466 | self._segmentfile.opener = self.opener | |
|
465 | 467 | # move redirected index data back into place |
|
466 | 468 | if self._docket is not None: |
|
467 | 469 | self._write_docket(tr) |
@@ -501,6 +503,7 b' class changelog(revlog.revlog):' | |||
|
501 | 503 | self._delaybuf = None |
|
502 | 504 | self._divert = True |
|
503 | 505 | self.opener = _divertopener(self._realopener, self._indexfile) |
|
506 | self._segmentfile.opener = self.opener | |
|
504 | 507 | |
|
505 | 508 | if self._divert: |
|
506 | 509 | return True |
@@ -86,6 +86,7 b' from .revlogutils import (' | |||
|
86 | 86 | docket as docketutil, |
|
87 | 87 | flagutil, |
|
88 | 88 | nodemap as nodemaputil, |
|
89 | randomaccessfile, | |
|
89 | 90 | revlogv0, |
|
90 | 91 | sidedata as sidedatautil, |
|
91 | 92 | ) |
@@ -125,7 +126,6 b" rustrevlog = policy.importrust('revlog')" | |||
|
125 | 126 | |
|
126 | 127 | # max size of revlog with inline data |
|
127 | 128 | _maxinline = 131072 |
|
128 | _chunksize = 1048576 | |
|
129 | 129 | |
|
130 | 130 | # Flag processors for REVIDX_ELLIPSIS. |
|
131 | 131 | def ellipsisreadprocessor(rl, text): |
@@ -232,10 +232,6 b' def parse_index_v1_mixed(data, inline):' | |||
|
232 | 232 | # signed integer) |
|
233 | 233 | _maxentrysize = 0x7FFFFFFF |
|
234 | 234 | |
|
235 | PARTIAL_READ_MSG = _( | |
|
236 | b'partial read of revlog %s; expected %d bytes from offset %d, got %d' | |
|
237 | ) | |
|
238 | ||
|
239 | 235 | FILE_TOO_SHORT_MSG = _( |
|
240 | 236 | b'cannot read from revlog %s;' |
|
241 | 237 | b' expected %d bytes from offset %d, data size is %d' |
@@ -605,7 +601,7 b' class revlog(object):' | |||
|
605 | 601 | self._parse_index = parse_index_v1_mixed |
|
606 | 602 | try: |
|
607 | 603 | d = self._parse_index(index_data, self._inline) |
|
608 |
index, |
|
|
604 | index, chunkcache = d | |
|
609 | 605 | use_nodemap = ( |
|
610 | 606 | not self._inline |
|
611 | 607 | and self._nodemap_file is not None |
@@ -626,9 +622,13 b' class revlog(object):' | |||
|
626 | 622 | raise error.RevlogError( |
|
627 | 623 | _(b"index %s is corrupted") % self.display_id |
|
628 | 624 | ) |
|
629 |
self.index |
|
|
630 | if not self._chunkcache: | |
|
631 |
self. |
|
|
625 | self.index = index | |
|
626 | self._segmentfile = randomaccessfile.randomaccessfile( | |
|
627 | self.opener, | |
|
628 | (self._indexfile if self._inline else self._datafile), | |
|
629 | self._chunkcachesize, | |
|
630 | chunkcache, | |
|
631 | ) | |
|
632 | 632 | # revnum -> (chain-length, sum-delta-length) |
|
633 | 633 | self._chaininfocache = util.lrucachedict(500) |
|
634 | 634 | # revlog header -> revlog compressor |
@@ -709,32 +709,6 b' class revlog(object):' | |||
|
709 | 709 | return self.opener(self._datafile, mode=mode) |
|
710 | 710 | |
|
711 | 711 | @contextlib.contextmanager |
|
712 | def _datareadfp(self, existingfp=None): | |
|
713 | """file object suitable to read data""" | |
|
714 | # Use explicit file handle, if given. | |
|
715 | if existingfp is not None: | |
|
716 | yield existingfp | |
|
717 | ||
|
718 | # Use a file handle being actively used for writes, if available. | |
|
719 | # There is some danger to doing this because reads will seek the | |
|
720 | # file. However, _writeentry() performs a SEEK_END before all writes, | |
|
721 | # so we should be safe. | |
|
722 | elif self._writinghandles: | |
|
723 | if self._inline: | |
|
724 | yield self._writinghandles[0] | |
|
725 | else: | |
|
726 | yield self._writinghandles[1] | |
|
727 | ||
|
728 | # Otherwise open a new file handle. | |
|
729 | else: | |
|
730 | if self._inline: | |
|
731 | func = self._indexfp | |
|
732 | else: | |
|
733 | func = self._datafp | |
|
734 | with func() as fp: | |
|
735 | yield fp | |
|
736 | ||
|
737 | @contextlib.contextmanager | |
|
738 | 712 | def _sidedatareadfp(self): |
|
739 | 713 | """file object suitable to read sidedata""" |
|
740 | 714 | if self._writinghandles: |
@@ -807,7 +781,7 b' class revlog(object):' | |||
|
807 | 781 | def clearcaches(self): |
|
808 | 782 | self._revisioncache = None |
|
809 | 783 | self._chainbasecache.clear() |
|
810 | self._chunkcache = (0, b'') | |
|
784 | self._segmentfile.clear_cache() | |
|
811 | 785 | self._pcache = {} |
|
812 | 786 | self._nodemap_docket = None |
|
813 | 787 | self.index.clearcaches() |
@@ -1629,85 +1603,6 b' class revlog(object):' | |||
|
1629 | 1603 | p1, p2 = self.parents(node) |
|
1630 | 1604 | return storageutil.hashrevisionsha1(text, p1, p2) != node |
|
1631 | 1605 | |
|
1632 | def _cachesegment(self, offset, data): | |
|
1633 | """Add a segment to the revlog cache. | |
|
1634 | ||
|
1635 | Accepts an absolute offset and the data that is at that location. | |
|
1636 | """ | |
|
1637 | o, d = self._chunkcache | |
|
1638 | # try to add to existing cache | |
|
1639 | if o + len(d) == offset and len(d) + len(data) < _chunksize: | |
|
1640 | self._chunkcache = o, d + data | |
|
1641 | else: | |
|
1642 | self._chunkcache = offset, data | |
|
1643 | ||
|
1644 | def _readsegment(self, offset, length, df=None): | |
|
1645 | """Load a segment of raw data from the revlog. | |
|
1646 | ||
|
1647 | Accepts an absolute offset, length to read, and an optional existing | |
|
1648 | file handle to read from. | |
|
1649 | ||
|
1650 | If an existing file handle is passed, it will be seeked and the | |
|
1651 | original seek position will NOT be restored. | |
|
1652 | ||
|
1653 | Returns a str or buffer of raw byte data. | |
|
1654 | ||
|
1655 | Raises if the requested number of bytes could not be read. | |
|
1656 | """ | |
|
1657 | # Cache data both forward and backward around the requested | |
|
1658 | # data, in a fixed size window. This helps speed up operations | |
|
1659 | # involving reading the revlog backwards. | |
|
1660 | cachesize = self._chunkcachesize | |
|
1661 | realoffset = offset & ~(cachesize - 1) | |
|
1662 | reallength = ( | |
|
1663 | (offset + length + cachesize) & ~(cachesize - 1) | |
|
1664 | ) - realoffset | |
|
1665 | with self._datareadfp(df) as df: | |
|
1666 | df.seek(realoffset) | |
|
1667 | d = df.read(reallength) | |
|
1668 | ||
|
1669 | self._cachesegment(realoffset, d) | |
|
1670 | if offset != realoffset or reallength != length: | |
|
1671 | startoffset = offset - realoffset | |
|
1672 | if len(d) - startoffset < length: | |
|
1673 | filename = self._indexfile if self._inline else self._datafile | |
|
1674 | got = len(d) - startoffset | |
|
1675 | m = PARTIAL_READ_MSG % (filename, length, offset, got) | |
|
1676 | raise error.RevlogError(m) | |
|
1677 | return util.buffer(d, startoffset, length) | |
|
1678 | ||
|
1679 | if len(d) < length: | |
|
1680 | filename = self._indexfile if self._inline else self._datafile | |
|
1681 | got = len(d) - startoffset | |
|
1682 | m = PARTIAL_READ_MSG % (filename, length, offset, got) | |
|
1683 | raise error.RevlogError(m) | |
|
1684 | ||
|
1685 | return d | |
|
1686 | ||
|
1687 | def _getsegment(self, offset, length, df=None): | |
|
1688 | """Obtain a segment of raw data from the revlog. | |
|
1689 | ||
|
1690 | Accepts an absolute offset, length of bytes to obtain, and an | |
|
1691 | optional file handle to the already-opened revlog. If the file | |
|
1692 | handle is used, it's original seek position will not be preserved. | |
|
1693 | ||
|
1694 | Requests for data may be returned from a cache. | |
|
1695 | ||
|
1696 | Returns a str or a buffer instance of raw byte data. | |
|
1697 | """ | |
|
1698 | o, d = self._chunkcache | |
|
1699 | l = len(d) | |
|
1700 | ||
|
1701 | # is it in the cache? | |
|
1702 | cachestart = offset - o | |
|
1703 | cacheend = cachestart + length | |
|
1704 | if cachestart >= 0 and cacheend <= l: | |
|
1705 | if cachestart == 0 and cacheend == l: | |
|
1706 | return d # avoid a copy | |
|
1707 | return util.buffer(d, cachestart, cacheend - cachestart) | |
|
1708 | ||
|
1709 | return self._readsegment(offset, length, df=df) | |
|
1710 | ||
|
1711 | 1606 | def _getsegmentforrevs(self, startrev, endrev, df=None): |
|
1712 | 1607 | """Obtain a segment of raw data corresponding to a range of revisions. |
|
1713 | 1608 | |
@@ -1740,7 +1635,7 b' class revlog(object):' | |||
|
1740 | 1635 | end += (endrev + 1) * self.index.entry_size |
|
1741 | 1636 | length = end - start |
|
1742 | 1637 | |
|
1743 |
return start, self._ |
|
|
1638 | return start, self._segmentfile.read_chunk(start, length, df) | |
|
1744 | 1639 | |
|
1745 | 1640 | def _chunk(self, rev, df=None): |
|
1746 | 1641 | """Obtain a single decompressed chunk for a revision. |
@@ -1832,10 +1727,6 b' class revlog(object):' | |||
|
1832 | 1727 | |
|
1833 | 1728 | return l |
|
1834 | 1729 | |
|
1835 | def _chunkclear(self): | |
|
1836 | """Clear the raw chunk cache.""" | |
|
1837 | self._chunkcache = (0, b'') | |
|
1838 | ||
|
1839 | 1730 | def deltaparent(self, rev): |
|
1840 | 1731 | """return deltaparent of the given revision""" |
|
1841 | 1732 | base = self.index[rev][3] |
@@ -2043,7 +1934,12 b' class revlog(object):' | |||
|
2043 | 1934 | length = sidedata_size |
|
2044 | 1935 | offset = sidedata_offset |
|
2045 | 1936 | got = len(comp_segment) |
|
2046 |
m = PARTIAL_READ_MSG % ( |
|
|
1937 | m = randomaccessfile.PARTIAL_READ_MSG % ( | |
|
1938 | filename, | |
|
1939 | length, | |
|
1940 | offset, | |
|
1941 | got, | |
|
1942 | ) | |
|
2047 | 1943 | raise error.RevlogError(m) |
|
2048 | 1944 | |
|
2049 | 1945 | comp = self.index[rev][11] |
@@ -2136,6 +2032,7 b' class revlog(object):' | |||
|
2136 | 2032 | # We can't use the cached file handle after close(). So prevent |
|
2137 | 2033 | # its usage. |
|
2138 | 2034 | self._writinghandles = None |
|
2035 | self._segmentfile.writing_handle = None | |
|
2139 | 2036 | |
|
2140 | 2037 | new_dfh = self._datafp(b'w+') |
|
2141 | 2038 | new_dfh.truncate(0) # drop any potentially existing data |
@@ -2171,12 +2068,17 b' class revlog(object):' | |||
|
2171 | 2068 | |
|
2172 | 2069 | tr.replace(self._indexfile, trindex * self.index.entry_size) |
|
2173 | 2070 | nodemaputil.setup_persistent_nodemap(tr, self) |
|
2174 | self._chunkclear() | |
|
2071 | self._segmentfile = randomaccessfile.randomaccessfile( | |
|
2072 | self.opener, | |
|
2073 | self._datafile, | |
|
2074 | self._chunkcachesize, | |
|
2075 | ) | |
|
2175 | 2076 | |
|
2176 | 2077 | if existing_handles: |
|
2177 | 2078 | # switched from inline to conventional reopen the index |
|
2178 | 2079 | ifh = self.__index_write_fp() |
|
2179 | 2080 | self._writinghandles = (ifh, new_dfh, None) |
|
2081 | self._segmentfile.writing_handle = new_dfh | |
|
2180 | 2082 | new_dfh = None |
|
2181 | 2083 | finally: |
|
2182 | 2084 | if new_dfh is not None: |
@@ -2235,11 +2137,13 b' class revlog(object):' | |||
|
2235 | 2137 | transaction.add(self._indexfile, isize) |
|
2236 | 2138 | # exposing all file handle for writing. |
|
2237 | 2139 | self._writinghandles = (ifh, dfh, sdfh) |
|
2140 | self._segmentfile.writing_handle = ifh if self._inline else dfh | |
|
2238 | 2141 | yield |
|
2239 | 2142 | if self._docket is not None: |
|
2240 | 2143 | self._write_docket(transaction) |
|
2241 | 2144 | finally: |
|
2242 | 2145 | self._writinghandles = None |
|
2146 | self._segmentfile.writing_handle = None | |
|
2243 | 2147 | if dfh is not None: |
|
2244 | 2148 | dfh.close() |
|
2245 | 2149 | if sdfh is not None: |
@@ -2873,7 +2777,7 b' class revlog(object):' | |||
|
2873 | 2777 | # then reset internal state in memory to forget those revisions |
|
2874 | 2778 | self._revisioncache = None |
|
2875 | 2779 | self._chaininfocache = util.lrucachedict(500) |
|
2876 |
self._ |
|
|
2780 | self._segmentfile.clear_cache() | |
|
2877 | 2781 | |
|
2878 | 2782 | del self.index[rev:-1] |
|
2879 | 2783 |
General Comments 0
You need to be logged in to leave comments.
Login now