Show More
@@ -1,159 +1,155 b'' | |||||
1 | # Copyright Mercurial Contributors |
|
1 | # Copyright Mercurial Contributors | |
2 | # |
|
2 | # | |
3 | # This software may be used and distributed according to the terms of the |
|
3 | # This software may be used and distributed according to the terms of the | |
4 | # GNU General Public License version 2 or any later version. |
|
4 | # GNU General Public License version 2 or any later version. | |
5 |
|
5 | |||
6 | import contextlib |
|
6 | import contextlib | |
7 |
|
7 | |||
8 | from ..i18n import _ |
|
8 | from ..i18n import _ | |
9 | from .. import ( |
|
9 | from .. import ( | |
10 | error, |
|
10 | error, | |
11 | util, |
|
11 | util, | |
12 | ) |
|
12 | ) | |
13 |
|
13 | |||
14 |
|
14 | |||
15 | _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB |
|
15 | _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB | |
16 |
|
16 | |||
17 | PARTIAL_READ_MSG = _( |
|
17 | PARTIAL_READ_MSG = _( | |
18 | b'partial read of revlog %s; expected %d bytes from offset %d, got %d' |
|
18 | b'partial read of revlog %s; expected %d bytes from offset %d, got %d' | |
19 | ) |
|
19 | ) | |
20 |
|
20 | |||
21 |
|
21 | |||
22 | def _is_power_of_two(n): |
|
22 | def _is_power_of_two(n): | |
23 | return (n & (n - 1) == 0) and n != 0 |
|
23 | return (n & (n - 1) == 0) and n != 0 | |
24 |
|
24 | |||
25 |
|
25 | |||
26 | class randomaccessfile: |
|
26 | class randomaccessfile: | |
27 | """Accessing arbitrary chuncks of data within a file, with some caching""" |
|
27 | """Accessing arbitrary chuncks of data within a file, with some caching""" | |
28 |
|
28 | |||
29 | def __init__( |
|
29 | def __init__( | |
30 | self, |
|
30 | self, | |
31 | opener, |
|
31 | opener, | |
32 | filename, |
|
32 | filename, | |
33 | default_cached_chunk_size, |
|
33 | default_cached_chunk_size, | |
34 | initial_cache=None, |
|
34 | initial_cache=None, | |
35 | ): |
|
35 | ): | |
36 | # Required by bitwise manipulation below |
|
36 | # Required by bitwise manipulation below | |
37 | assert _is_power_of_two(default_cached_chunk_size) |
|
37 | assert _is_power_of_two(default_cached_chunk_size) | |
38 |
|
38 | |||
39 | self.opener = opener |
|
39 | self.opener = opener | |
40 | self.filename = filename |
|
40 | self.filename = filename | |
41 | self.default_cached_chunk_size = default_cached_chunk_size |
|
41 | self.default_cached_chunk_size = default_cached_chunk_size | |
42 | self.writing_handle = None # This is set from revlog.py |
|
42 | self.writing_handle = None # This is set from revlog.py | |
43 | self.reading_handle = None |
|
43 | self.reading_handle = None | |
44 | self._cached_chunk = b'' |
|
44 | self._cached_chunk = b'' | |
45 | self._cached_chunk_position = 0 # Offset from the start of the file |
|
45 | self._cached_chunk_position = 0 # Offset from the start of the file | |
46 | if initial_cache: |
|
46 | if initial_cache: | |
47 | self._cached_chunk_position, self._cached_chunk = initial_cache |
|
47 | self._cached_chunk_position, self._cached_chunk = initial_cache | |
48 |
|
48 | |||
49 | def clear_cache(self): |
|
49 | def clear_cache(self): | |
50 | self._cached_chunk = b'' |
|
50 | self._cached_chunk = b'' | |
51 | self._cached_chunk_position = 0 |
|
51 | self._cached_chunk_position = 0 | |
52 |
|
52 | |||
53 | def _open(self, mode=b'r'): |
|
53 | def _open(self, mode=b'r'): | |
54 | """Return a file object""" |
|
54 | """Return a file object""" | |
55 | return self.opener(self.filename, mode=mode) |
|
55 | return self.opener(self.filename, mode=mode) | |
56 |
|
56 | |||
57 | @contextlib.contextmanager |
|
57 | @contextlib.contextmanager | |
58 | def _open_read(self, existing_file_obj=None): |
|
58 | def _read_handle(self): | |
59 | """File object suitable for reading data""" |
|
59 | """File object suitable for reading data""" | |
60 | # Use explicit file handle, if given. |
|
|||
61 | if existing_file_obj is not None: |
|
|||
62 | yield existing_file_obj |
|
|||
63 |
|
||||
64 | # Use a file handle being actively used for writes, if available. |
|
60 | # Use a file handle being actively used for writes, if available. | |
65 | # There is some danger to doing this because reads will seek the |
|
61 | # There is some danger to doing this because reads will seek the | |
66 | # file. However, revlog._writeentry performs a SEEK_END before all |
|
62 | # file. However, revlog._writeentry performs a SEEK_END before all | |
67 | # writes, so we should be safe. |
|
63 | # writes, so we should be safe. | |
68 |
|
|
64 | if self.writing_handle: | |
69 | yield self.writing_handle |
|
65 | yield self.writing_handle | |
70 |
|
66 | |||
71 | elif self.reading_handle: |
|
67 | elif self.reading_handle: | |
72 | yield self.reading_handle |
|
68 | yield self.reading_handle | |
73 |
|
69 | |||
74 | # Otherwise open a new file handle. |
|
70 | # Otherwise open a new file handle. | |
75 | else: |
|
71 | else: | |
76 | with self._open() as fp: |
|
72 | with self._open() as fp: | |
77 | yield fp |
|
73 | yield fp | |
78 |
|
74 | |||
79 | @contextlib.contextmanager |
|
75 | @contextlib.contextmanager | |
80 | def reading(self): |
|
76 | def reading(self): | |
81 | """Context manager that keeps the file open for reading""" |
|
77 | """Context manager that keeps the file open for reading""" | |
82 | if ( |
|
78 | if ( | |
83 | self.reading_handle is None |
|
79 | self.reading_handle is None | |
84 | and self.writing_handle is None |
|
80 | and self.writing_handle is None | |
85 | and self.filename is not None |
|
81 | and self.filename is not None | |
86 | ): |
|
82 | ): | |
87 | with self._open() as fp: |
|
83 | with self._open() as fp: | |
88 | self.reading_handle = fp |
|
84 | self.reading_handle = fp | |
89 | try: |
|
85 | try: | |
90 | yield |
|
86 | yield | |
91 | finally: |
|
87 | finally: | |
92 | self.reading_handle = None |
|
88 | self.reading_handle = None | |
93 | else: |
|
89 | else: | |
94 | yield |
|
90 | yield | |
95 |
|
91 | |||
96 |
def read_chunk(self, offset, length |
|
92 | def read_chunk(self, offset, length): | |
97 | """Read a chunk of bytes from the file. |
|
93 | """Read a chunk of bytes from the file. | |
98 |
|
94 | |||
99 | Accepts an absolute offset, length to read, and an optional existing |
|
95 | Accepts an absolute offset, length to read, and an optional existing | |
100 | file handle to read from. |
|
96 | file handle to read from. | |
101 |
|
97 | |||
102 | If an existing file handle is passed, it will be seeked and the |
|
98 | If an existing file handle is passed, it will be seeked and the | |
103 | original seek position will NOT be restored. |
|
99 | original seek position will NOT be restored. | |
104 |
|
100 | |||
105 | Returns a str or buffer of raw byte data. |
|
101 | Returns a str or buffer of raw byte data. | |
106 |
|
102 | |||
107 | Raises if the requested number of bytes could not be read. |
|
103 | Raises if the requested number of bytes could not be read. | |
108 | """ |
|
104 | """ | |
109 | end = offset + length |
|
105 | end = offset + length | |
110 | cache_start = self._cached_chunk_position |
|
106 | cache_start = self._cached_chunk_position | |
111 | cache_end = cache_start + len(self._cached_chunk) |
|
107 | cache_end = cache_start + len(self._cached_chunk) | |
112 | # Is the requested chunk within the cache? |
|
108 | # Is the requested chunk within the cache? | |
113 | if cache_start <= offset and end <= cache_end: |
|
109 | if cache_start <= offset and end <= cache_end: | |
114 | if cache_start == offset and end == cache_end: |
|
110 | if cache_start == offset and end == cache_end: | |
115 | return self._cached_chunk # avoid a copy |
|
111 | return self._cached_chunk # avoid a copy | |
116 | relative_start = offset - cache_start |
|
112 | relative_start = offset - cache_start | |
117 | return util.buffer(self._cached_chunk, relative_start, length) |
|
113 | return util.buffer(self._cached_chunk, relative_start, length) | |
118 |
|
114 | |||
119 |
return self._read_and_update_cache(offset, length |
|
115 | return self._read_and_update_cache(offset, length) | |
120 |
|
116 | |||
121 |
def _read_and_update_cache(self, offset, length |
|
117 | def _read_and_update_cache(self, offset, length): | |
122 | # Cache data both forward and backward around the requested |
|
118 | # Cache data both forward and backward around the requested | |
123 | # data, in a fixed size window. This helps speed up operations |
|
119 | # data, in a fixed size window. This helps speed up operations | |
124 | # involving reading the revlog backwards. |
|
120 | # involving reading the revlog backwards. | |
125 | real_offset = offset & ~(self.default_cached_chunk_size - 1) |
|
121 | real_offset = offset & ~(self.default_cached_chunk_size - 1) | |
126 | real_length = ( |
|
122 | real_length = ( | |
127 | (offset + length + self.default_cached_chunk_size) |
|
123 | (offset + length + self.default_cached_chunk_size) | |
128 | & ~(self.default_cached_chunk_size - 1) |
|
124 | & ~(self.default_cached_chunk_size - 1) | |
129 | ) - real_offset |
|
125 | ) - real_offset | |
130 |
with self. |
|
126 | with self._read_handle() as file_obj: | |
131 | file_obj.seek(real_offset) |
|
127 | file_obj.seek(real_offset) | |
132 | data = file_obj.read(real_length) |
|
128 | data = file_obj.read(real_length) | |
133 |
|
129 | |||
134 | self._add_cached_chunk(real_offset, data) |
|
130 | self._add_cached_chunk(real_offset, data) | |
135 |
|
131 | |||
136 | relative_offset = offset - real_offset |
|
132 | relative_offset = offset - real_offset | |
137 | got = len(data) - relative_offset |
|
133 | got = len(data) - relative_offset | |
138 | if got < length: |
|
134 | if got < length: | |
139 | message = PARTIAL_READ_MSG % (self.filename, length, offset, got) |
|
135 | message = PARTIAL_READ_MSG % (self.filename, length, offset, got) | |
140 | raise error.RevlogError(message) |
|
136 | raise error.RevlogError(message) | |
141 |
|
137 | |||
142 | if offset != real_offset or real_length != length: |
|
138 | if offset != real_offset or real_length != length: | |
143 | return util.buffer(data, relative_offset, length) |
|
139 | return util.buffer(data, relative_offset, length) | |
144 | return data |
|
140 | return data | |
145 |
|
141 | |||
146 | def _add_cached_chunk(self, offset, data): |
|
142 | def _add_cached_chunk(self, offset, data): | |
147 | """Add to or replace the cached data chunk. |
|
143 | """Add to or replace the cached data chunk. | |
148 |
|
144 | |||
149 | Accepts an absolute offset and the data that is at that location. |
|
145 | Accepts an absolute offset and the data that is at that location. | |
150 | """ |
|
146 | """ | |
151 | if ( |
|
147 | if ( | |
152 | self._cached_chunk_position + len(self._cached_chunk) == offset |
|
148 | self._cached_chunk_position + len(self._cached_chunk) == offset | |
153 | and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE |
|
149 | and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE | |
154 | ): |
|
150 | ): | |
155 | # add to existing cache |
|
151 | # add to existing cache | |
156 | self._cached_chunk += data |
|
152 | self._cached_chunk += data | |
157 | else: |
|
153 | else: | |
158 | self._cached_chunk = data |
|
154 | self._cached_chunk = data | |
159 | self._cached_chunk_position = offset |
|
155 | self._cached_chunk_position = offset |
General Comments 0
You need to be logged in to leave comments.
Login now