##// END OF EJS Templates
randomaccessfile: drop explicit passing of file description...
marmoute -
r51920:3314c41c default
parent child Browse files
Show More
@@ -1,159 +1,155 b''
1 # Copyright Mercurial Contributors
1 # Copyright Mercurial Contributors
2 #
2 #
3 # This software may be used and distributed according to the terms of the
3 # This software may be used and distributed according to the terms of the
4 # GNU General Public License version 2 or any later version.
4 # GNU General Public License version 2 or any later version.
5
5
6 import contextlib
6 import contextlib
7
7
8 from ..i18n import _
8 from ..i18n import _
9 from .. import (
9 from .. import (
10 error,
10 error,
11 util,
11 util,
12 )
12 )
13
13
14
14
15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
15 _MAX_CACHED_CHUNK_SIZE = 1048576 # 1 MiB
16
16
17 PARTIAL_READ_MSG = _(
17 PARTIAL_READ_MSG = _(
18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
18 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
19 )
19 )
20
20
21
21
22 def _is_power_of_two(n):
22 def _is_power_of_two(n):
23 return (n & (n - 1) == 0) and n != 0
23 return (n & (n - 1) == 0) and n != 0
24
24
25
25
26 class randomaccessfile:
26 class randomaccessfile:
27 """Accessing arbitrary chuncks of data within a file, with some caching"""
27 """Accessing arbitrary chuncks of data within a file, with some caching"""
28
28
29 def __init__(
29 def __init__(
30 self,
30 self,
31 opener,
31 opener,
32 filename,
32 filename,
33 default_cached_chunk_size,
33 default_cached_chunk_size,
34 initial_cache=None,
34 initial_cache=None,
35 ):
35 ):
36 # Required by bitwise manipulation below
36 # Required by bitwise manipulation below
37 assert _is_power_of_two(default_cached_chunk_size)
37 assert _is_power_of_two(default_cached_chunk_size)
38
38
39 self.opener = opener
39 self.opener = opener
40 self.filename = filename
40 self.filename = filename
41 self.default_cached_chunk_size = default_cached_chunk_size
41 self.default_cached_chunk_size = default_cached_chunk_size
42 self.writing_handle = None # This is set from revlog.py
42 self.writing_handle = None # This is set from revlog.py
43 self.reading_handle = None
43 self.reading_handle = None
44 self._cached_chunk = b''
44 self._cached_chunk = b''
45 self._cached_chunk_position = 0 # Offset from the start of the file
45 self._cached_chunk_position = 0 # Offset from the start of the file
46 if initial_cache:
46 if initial_cache:
47 self._cached_chunk_position, self._cached_chunk = initial_cache
47 self._cached_chunk_position, self._cached_chunk = initial_cache
48
48
49 def clear_cache(self):
49 def clear_cache(self):
50 self._cached_chunk = b''
50 self._cached_chunk = b''
51 self._cached_chunk_position = 0
51 self._cached_chunk_position = 0
52
52
53 def _open(self, mode=b'r'):
53 def _open(self, mode=b'r'):
54 """Return a file object"""
54 """Return a file object"""
55 return self.opener(self.filename, mode=mode)
55 return self.opener(self.filename, mode=mode)
56
56
57 @contextlib.contextmanager
57 @contextlib.contextmanager
58 def _open_read(self, existing_file_obj=None):
58 def _read_handle(self):
59 """File object suitable for reading data"""
59 """File object suitable for reading data"""
60 # Use explicit file handle, if given.
61 if existing_file_obj is not None:
62 yield existing_file_obj
63
64 # Use a file handle being actively used for writes, if available.
60 # Use a file handle being actively used for writes, if available.
65 # There is some danger to doing this because reads will seek the
61 # There is some danger to doing this because reads will seek the
66 # file. However, revlog._writeentry performs a SEEK_END before all
62 # file. However, revlog._writeentry performs a SEEK_END before all
67 # writes, so we should be safe.
63 # writes, so we should be safe.
68 elif self.writing_handle:
64 if self.writing_handle:
69 yield self.writing_handle
65 yield self.writing_handle
70
66
71 elif self.reading_handle:
67 elif self.reading_handle:
72 yield self.reading_handle
68 yield self.reading_handle
73
69
74 # Otherwise open a new file handle.
70 # Otherwise open a new file handle.
75 else:
71 else:
76 with self._open() as fp:
72 with self._open() as fp:
77 yield fp
73 yield fp
78
74
79 @contextlib.contextmanager
75 @contextlib.contextmanager
80 def reading(self):
76 def reading(self):
81 """Context manager that keeps the file open for reading"""
77 """Context manager that keeps the file open for reading"""
82 if (
78 if (
83 self.reading_handle is None
79 self.reading_handle is None
84 and self.writing_handle is None
80 and self.writing_handle is None
85 and self.filename is not None
81 and self.filename is not None
86 ):
82 ):
87 with self._open() as fp:
83 with self._open() as fp:
88 self.reading_handle = fp
84 self.reading_handle = fp
89 try:
85 try:
90 yield
86 yield
91 finally:
87 finally:
92 self.reading_handle = None
88 self.reading_handle = None
93 else:
89 else:
94 yield
90 yield
95
91
96 def read_chunk(self, offset, length, existing_file_obj=None):
92 def read_chunk(self, offset, length):
97 """Read a chunk of bytes from the file.
93 """Read a chunk of bytes from the file.
98
94
99 Accepts an absolute offset, length to read, and an optional existing
95 Accepts an absolute offset, length to read, and an optional existing
100 file handle to read from.
96 file handle to read from.
101
97
102 If an existing file handle is passed, it will be seeked and the
98 If an existing file handle is passed, it will be seeked and the
103 original seek position will NOT be restored.
99 original seek position will NOT be restored.
104
100
105 Returns a str or buffer of raw byte data.
101 Returns a str or buffer of raw byte data.
106
102
107 Raises if the requested number of bytes could not be read.
103 Raises if the requested number of bytes could not be read.
108 """
104 """
109 end = offset + length
105 end = offset + length
110 cache_start = self._cached_chunk_position
106 cache_start = self._cached_chunk_position
111 cache_end = cache_start + len(self._cached_chunk)
107 cache_end = cache_start + len(self._cached_chunk)
112 # Is the requested chunk within the cache?
108 # Is the requested chunk within the cache?
113 if cache_start <= offset and end <= cache_end:
109 if cache_start <= offset and end <= cache_end:
114 if cache_start == offset and end == cache_end:
110 if cache_start == offset and end == cache_end:
115 return self._cached_chunk # avoid a copy
111 return self._cached_chunk # avoid a copy
116 relative_start = offset - cache_start
112 relative_start = offset - cache_start
117 return util.buffer(self._cached_chunk, relative_start, length)
113 return util.buffer(self._cached_chunk, relative_start, length)
118
114
119 return self._read_and_update_cache(offset, length, existing_file_obj)
115 return self._read_and_update_cache(offset, length)
120
116
121 def _read_and_update_cache(self, offset, length, existing_file_obj=None):
117 def _read_and_update_cache(self, offset, length):
122 # Cache data both forward and backward around the requested
118 # Cache data both forward and backward around the requested
123 # data, in a fixed size window. This helps speed up operations
119 # data, in a fixed size window. This helps speed up operations
124 # involving reading the revlog backwards.
120 # involving reading the revlog backwards.
125 real_offset = offset & ~(self.default_cached_chunk_size - 1)
121 real_offset = offset & ~(self.default_cached_chunk_size - 1)
126 real_length = (
122 real_length = (
127 (offset + length + self.default_cached_chunk_size)
123 (offset + length + self.default_cached_chunk_size)
128 & ~(self.default_cached_chunk_size - 1)
124 & ~(self.default_cached_chunk_size - 1)
129 ) - real_offset
125 ) - real_offset
130 with self._open_read(existing_file_obj) as file_obj:
126 with self._read_handle() as file_obj:
131 file_obj.seek(real_offset)
127 file_obj.seek(real_offset)
132 data = file_obj.read(real_length)
128 data = file_obj.read(real_length)
133
129
134 self._add_cached_chunk(real_offset, data)
130 self._add_cached_chunk(real_offset, data)
135
131
136 relative_offset = offset - real_offset
132 relative_offset = offset - real_offset
137 got = len(data) - relative_offset
133 got = len(data) - relative_offset
138 if got < length:
134 if got < length:
139 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
135 message = PARTIAL_READ_MSG % (self.filename, length, offset, got)
140 raise error.RevlogError(message)
136 raise error.RevlogError(message)
141
137
142 if offset != real_offset or real_length != length:
138 if offset != real_offset or real_length != length:
143 return util.buffer(data, relative_offset, length)
139 return util.buffer(data, relative_offset, length)
144 return data
140 return data
145
141
146 def _add_cached_chunk(self, offset, data):
142 def _add_cached_chunk(self, offset, data):
147 """Add to or replace the cached data chunk.
143 """Add to or replace the cached data chunk.
148
144
149 Accepts an absolute offset and the data that is at that location.
145 Accepts an absolute offset and the data that is at that location.
150 """
146 """
151 if (
147 if (
152 self._cached_chunk_position + len(self._cached_chunk) == offset
148 self._cached_chunk_position + len(self._cached_chunk) == offset
153 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
149 and len(self._cached_chunk) + len(data) < _MAX_CACHED_CHUNK_SIZE
154 ):
150 ):
155 # add to existing cache
151 # add to existing cache
156 self._cached_chunk += data
152 self._cached_chunk += data
157 else:
153 else:
158 self._cached_chunk = data
154 self._cached_chunk = data
159 self._cached_chunk_position = offset
155 self._cached_chunk_position = offset
General Comments 0
You need to be logged in to leave comments. Login now