##// END OF EJS Templates
revlog: store sidedata in their own file...
marmoute -
r48181:e6292eb3 default
parent child Browse files
Show More
@@ -1162,13 +1162,13 b' coreconfigitem('
1162 1162 # rewriting sidedata.
1163 1163 # * introduce a proper solution to reduce the number of filelog related files.
1164 1164 # * use caching for reading sidedata (similar to what we do for data).
1165 # * no longer set offset=0 if sidedata_size=0 (simplify cutoff computation).
1165 1166 # * Improvement to consider
1166 1167 # - avoid compression header in chunk using the default compression?
1167 1168 # - forbid "inline" compression mode entirely?
1168 1169 # - split the data offset and flag field (the 2 bytes save are mostly trouble)
1169 1170 # - keep track of uncompressed -chunk- size (to preallocate memory better)
1170 1171 # - keep track of chain base or size (probably not that useful anymore)
1171 # - store data and sidedata in different files
1172 1172 coreconfigitem(
1173 1173 b'experimental',
1174 1174 b'revlogv2',
@@ -1,4 +1,5 b''
1 1 # revlog.py - storage back-end for mercurial
2 # coding: utf8
2 3 #
3 4 # Copyright 2005-2007 Olivia Mackall <olivia@selenic.com>
4 5 #
@@ -260,6 +261,11 b' PARTIAL_READ_MSG = _('
260 261 b'partial read of revlog %s; expected %d bytes from offset %d, got %d'
261 262 )
262 263
264 FILE_TOO_SHORT_MSG = _(
265 b'cannot read from revlog %s;'
266 b' expected %d bytes from offset %d, data size is %d'
267 )
268
263 269
264 270 class revlog(object):
265 271 """
@@ -401,6 +407,7 b' class revlog(object):'
401 407 self._docket_file = None
402 408 self._indexfile = None
403 409 self._datafile = None
410 self._sidedatafile = None
404 411 self._nodemap_file = None
405 412 self.postfix = postfix
406 413 self._trypending = trypending
@@ -445,7 +452,7 b' class revlog(object):'
445 452 # custom flags.
446 453 self._flagprocessors = dict(flagutil.flagprocessors)
447 454
448 # 2-tuple of file handles being used for active writing.
455 # 3-tuple of file handles being used for active writing.
449 456 self._writinghandles = None
450 457 # prevent nesting of addgroup
451 458 self._adding_group = None
@@ -634,6 +641,7 b' class revlog(object):'
634 641
635 642 if self._docket is not None:
636 643 self._datafile = self._docket.data_filepath()
644 self._sidedatafile = self._docket.sidedata_filepath()
637 645 elif self.postfix is None:
638 646 self._datafile = b'%s.d' % self.radix
639 647 else:
@@ -803,9 +811,14 b' class revlog(object):'
803 811 with func() as fp:
804 812 yield fp
805 813
814 @contextlib.contextmanager
806 815 def _sidedatareadfp(self):
807 816 """file object suitable to read sidedata"""
808 return self._datareadfp()
817 if self._writinghandles:
818 yield self._writinghandles[2]
819 else:
820 with self.opener(self._sidedatafile) as fp:
821 yield fp
809 822
810 823 def tiprev(self):
811 824 return len(self.index) - 1
@@ -909,6 +922,23 b' class revlog(object):'
909 922 def start(self, rev):
910 923 return int(self.index[rev][0] >> 16)
911 924
925 def sidedata_cut_off(self, rev):
926 sd_cut_off = self.index[rev][8]
927 if sd_cut_off != 0:
928 return sd_cut_off
929 # This is some annoying dance, because entries without sidedata
930 # currently use 0 as their ofsset. (instead of previous-offset +
931 # previous-size)
932 #
933 # We should reconsider this sidedata → 0 sidata_offset policy.
934 # In the meantime, we need this.
935 while 0 <= rev:
936 e = self.index[rev]
937 if e[9] != 0:
938 return e[8] + e[9]
939 rev -= 1
940 return 0
941
912 942 def flags(self, rev):
913 943 return self.index[rev][0] & 0xFFFF
914 944
@@ -2074,11 +2104,19 b' class revlog(object):'
2074 2104
2075 2105 # XXX this need caching, as we do for data
2076 2106 with self._sidedatareadfp() as sdf:
2077 sdf.seek(sidedata_offset)
2107 if self._docket.sidedata_end < sidedata_offset + sidedata_size:
2108 filename = self._sidedatafile
2109 end = self._docket.sidedata_end
2110 offset = sidedata_offset
2111 length = sidedata_size
2112 m = FILE_TOO_SHORT_MSG % (filename, length, offset, end)
2113 raise error.RevlogError(m)
2114
2115 sdf.seek(sidedata_offset, os.SEEK_SET)
2078 2116 comp_segment = sdf.read(sidedata_size)
2079 2117
2080 2118 if len(comp_segment) < sidedata_size:
2081 filename = self._datafile
2119 filename = self._sidedatafile
2082 2120 length = sidedata_size
2083 2121 offset = sidedata_offset
2084 2122 got = len(comp_segment)
@@ -2215,7 +2253,7 b' class revlog(object):'
2215 2253 if existing_handles:
2216 2254 # switched from inline to conventional reopen the index
2217 2255 ifh = self.__index_write_fp()
2218 self._writinghandles = (ifh, new_dfh)
2256 self._writinghandles = (ifh, new_dfh, None)
2219 2257 new_dfh = None
2220 2258 finally:
2221 2259 if new_dfh is not None:
@@ -2233,7 +2271,7 b' class revlog(object):'
2233 2271 if self._writinghandles is not None:
2234 2272 yield
2235 2273 else:
2236 ifh = dfh = None
2274 ifh = dfh = sdfh = None
2237 2275 try:
2238 2276 r = len(self)
2239 2277 # opening the data file.
@@ -2253,6 +2291,17 b' class revlog(object):'
2253 2291 raise
2254 2292 dfh = self._datafp(b"w+")
2255 2293 transaction.add(self._datafile, dsize)
2294 if self._sidedatafile is not None:
2295 try:
2296 sdfh = self.opener(self._sidedatafile, mode=b"r+")
2297 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2298 except IOError as inst:
2299 if inst.errno != errno.ENOENT:
2300 raise
2301 sdfh = self.opener(self._sidedatafile, mode=b"w+")
2302 transaction.add(
2303 self._sidedatafile, self._docket.sidedata_end
2304 )
2256 2305
2257 2306 # opening the index file.
2258 2307 isize = r * self.index.entry_size
@@ -2262,7 +2311,7 b' class revlog(object):'
2262 2311 else:
2263 2312 transaction.add(self._indexfile, isize)
2264 2313 # exposing all file handle for writing.
2265 self._writinghandles = (ifh, dfh)
2314 self._writinghandles = (ifh, dfh, sdfh)
2266 2315 yield
2267 2316 if self._docket is not None:
2268 2317 self._write_docket(transaction)
@@ -2270,6 +2319,8 b' class revlog(object):'
2270 2319 self._writinghandles = None
2271 2320 if dfh is not None:
2272 2321 dfh.close()
2322 if sdfh is not None:
2323 dfh.close()
2273 2324 # closing the index file last to avoid exposing referent to
2274 2325 # potential unflushed data content.
2275 2326 if ifh is not None:
@@ -2513,7 +2564,8 b' class revlog(object):'
2513 2564 offset = self._get_data_offset(prev)
2514 2565
2515 2566 if self._concurrencychecker:
2516 ifh, dfh = self._writinghandles
2567 ifh, dfh, sdfh = self._writinghandles
2568 # XXX no checking for the sidedata file
2517 2569 if self._inline:
2518 2570 # offset is "as if" it were in the .d file, so we need to add on
2519 2571 # the size of the entry metadata.
@@ -2570,7 +2622,7 b' class revlog(object):'
2570 2622 if sidedata and self.hassidedata:
2571 2623 sidedata_compression_mode = COMP_MODE_PLAIN
2572 2624 serialized_sidedata = sidedatautil.serialize_sidedata(sidedata)
2573 sidedata_offset = offset + deltainfo.deltalen
2625 sidedata_offset = self._docket.sidedata_end
2574 2626 h, comp_sidedata = self.compress(serialized_sidedata)
2575 2627 if (
2576 2628 h != b'u'
@@ -2622,6 +2674,7 b' class revlog(object):'
2622 2674 link,
2623 2675 offset,
2624 2676 serialized_sidedata,
2677 sidedata_offset,
2625 2678 )
2626 2679
2627 2680 rawtext = btext[0]
@@ -2648,7 +2701,9 b' class revlog(object):'
2648 2701 else:
2649 2702 return self._docket.data_end
2650 2703
2651 def _writeentry(self, transaction, entry, data, link, offset, sidedata):
2704 def _writeentry(
2705 self, transaction, entry, data, link, offset, sidedata, sidedata_offset
2706 ):
2652 2707 # Files opened in a+ mode have inconsistent behavior on various
2653 2708 # platforms. Windows requires that a file positioning call be made
2654 2709 # when the file handle transitions between reads and writes. See
@@ -2664,7 +2719,7 b' class revlog(object):'
2664 2719 if self._writinghandles is None:
2665 2720 msg = b'adding revision outside `revlog._writing` context'
2666 2721 raise error.ProgrammingError(msg)
2667 ifh, dfh = self._writinghandles
2722 ifh, dfh, sdfh = self._writinghandles
2668 2723 if self._docket is None:
2669 2724 ifh.seek(0, os.SEEK_END)
2670 2725 else:
@@ -2674,16 +2729,20 b' class revlog(object):'
2674 2729 dfh.seek(0, os.SEEK_END)
2675 2730 else:
2676 2731 dfh.seek(self._docket.data_end, os.SEEK_SET)
2732 if sdfh:
2733 sdfh.seek(self._docket.sidedata_end, os.SEEK_SET)
2677 2734
2678 2735 curr = len(self) - 1
2679 2736 if not self._inline:
2680 2737 transaction.add(self._datafile, offset)
2738 if self._sidedatafile:
2739 transaction.add(self._sidedatafile, sidedata_offset)
2681 2740 transaction.add(self._indexfile, curr * len(entry))
2682 2741 if data[0]:
2683 2742 dfh.write(data[0])
2684 2743 dfh.write(data[1])
2685 2744 if sidedata:
2686 dfh.write(sidedata)
2745 sdfh.write(sidedata)
2687 2746 ifh.write(entry)
2688 2747 else:
2689 2748 offset += curr * self.index.entry_size
@@ -2691,12 +2750,12 b' class revlog(object):'
2691 2750 ifh.write(entry)
2692 2751 ifh.write(data[0])
2693 2752 ifh.write(data[1])
2694 if sidedata:
2695 ifh.write(sidedata)
2753 assert not sidedata
2696 2754 self._enforceinlinesize(transaction)
2697 2755 if self._docket is not None:
2698 2756 self._docket.index_end = self._writinghandles[0].tell()
2699 2757 self._docket.data_end = self._writinghandles[1].tell()
2758 self._docket.sidedata_end = self._writinghandles[2].tell()
2700 2759
2701 2760 nodemaputil.setup_persistent_nodemap(transaction, self)
2702 2761
@@ -2866,12 +2925,17 b' class revlog(object):'
2866 2925 else:
2867 2926 end = data_end + (rev * self.index.entry_size)
2868 2927
2928 if self._sidedatafile:
2929 sidedata_end = self.sidedata_cut_off(rev)
2930 transaction.add(self._sidedatafile, sidedata_end)
2931
2869 2932 transaction.add(self._indexfile, end)
2870 2933 if self._docket is not None:
2871 2934 # XXX we could, leverage the docket while stripping. However it is
2872 2935 # not powerfull enough at the time of this comment
2873 2936 self._docket.index_end = end
2874 2937 self._docket.data_end = data_end
2938 self._docket.sidedata_end = sidedata_end
2875 2939 self._docket.write(transaction, stripping=True)
2876 2940
2877 2941 # then reset internal state in memory to forget those revisions
@@ -3398,13 +3462,10 b' class revlog(object):'
3398 3462 new_entries = []
3399 3463 # append the new sidedata
3400 3464 with self._writing(transaction):
3401 ifh, dfh = self._writinghandles
3402 if self._docket is not None:
3403 dfh.seek(self._docket.data_end, os.SEEK_SET)
3404 else:
3405 dfh.seek(0, os.SEEK_END)
3406
3407 current_offset = dfh.tell()
3465 ifh, dfh, sdfh = self._writinghandles
3466 dfh.seek(self._docket.sidedata_end, os.SEEK_SET)
3467
3468 current_offset = sdfh.tell()
3408 3469 for rev in range(startrev, endrev + 1):
3409 3470 entry = self.index[rev]
3410 3471 new_sidedata, flags = sidedatautil.run_sidedata_helpers(
@@ -3455,12 +3516,11 b' class revlog(object):'
3455 3516 )
3456 3517
3457 3518 # the sidedata computation might have move the file cursors around
3458 dfh.seek(current_offset, os.SEEK_SET)
3459 dfh.write(serialized_sidedata)
3519 sdfh.seek(current_offset, os.SEEK_SET)
3520 sdfh.write(serialized_sidedata)
3460 3521 new_entries.append(entry_update)
3461 3522 current_offset += len(serialized_sidedata)
3462 if self._docket is not None:
3463 self._docket.data_end = dfh.tell()
3523 self._docket.sidedata_end = sdfh.tell()
3464 3524
3465 3525 # rewrite the new index entries
3466 3526 ifh.seek(startrev * self.index.entry_size)
@@ -90,12 +90,15 b' if stable_docket_file:'
90 90 # | revlog index header.
91 91 # * 1 bytes: size of index uuid
92 92 # * 1 bytes: size of data uuid
93 # * 1 bytes: size of sizedata uuid
93 94 # * 8 bytes: size of index-data
94 95 # * 8 bytes: pending size of index-data
95 96 # * 8 bytes: size of data
97 # * 8 bytes: size of sidedata
96 98 # * 8 bytes: pending size of data
99 # * 8 bytes: pending size of sidedata
97 100 # * 1 bytes: default compression header
98 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBLLLLc')
101 S_HEADER = struct.Struct(constants.INDEX_HEADER_FMT + b'BBBLLLLLLc')
99 102
100 103
101 104 class RevlogDocket(object):
@@ -108,10 +111,13 b' class RevlogDocket(object):'
108 111 version_header=None,
109 112 index_uuid=None,
110 113 data_uuid=None,
114 sidedata_uuid=None,
111 115 index_end=0,
112 116 pending_index_end=0,
113 117 data_end=0,
114 118 pending_data_end=0,
119 sidedata_end=0,
120 pending_sidedata_end=0,
115 121 default_compression_header=None,
116 122 ):
117 123 self._version_header = version_header
@@ -122,19 +128,25 b' class RevlogDocket(object):'
122 128 self._opener = revlog.opener
123 129 self._index_uuid = index_uuid
124 130 self._data_uuid = data_uuid
131 self._sidedata_uuid = sidedata_uuid
125 132 # thes asserts should be True as long as we have a single index filename
126 133 assert index_end <= pending_index_end
127 134 assert data_end <= pending_data_end
135 assert sidedata_end <= pending_sidedata_end
128 136 self._initial_index_end = index_end
129 137 self._pending_index_end = pending_index_end
130 138 self._initial_data_end = data_end
131 139 self._pending_data_end = pending_data_end
140 self._initial_sidedata_end = sidedata_end
141 self._pending_sidedata_end = pending_sidedata_end
132 142 if use_pending:
133 143 self._index_end = self._pending_index_end
134 144 self._data_end = self._pending_data_end
145 self._sidedata_end = self._pending_sidedata_end
135 146 else:
136 147 self._index_end = self._initial_index_end
137 148 self._data_end = self._initial_data_end
149 self._sidedata_end = self._initial_sidedata_end
138 150 self.default_compression_header = default_compression_header
139 151
140 152 def index_filepath(self):
@@ -151,6 +163,13 b' class RevlogDocket(object):'
151 163 self._data_uuid = make_uid()
152 164 return b"%s-%s.dat" % (self._radix, self._data_uuid)
153 165
166 def sidedata_filepath(self):
167 """file path to the current sidedata file associated to this docket"""
168 # very simplistic version at first
169 if self._sidedata_uuid is None:
170 self._sidedata_uuid = make_uid()
171 return b"%s-%s.sda" % (self._radix, self._sidedata_uuid)
172
154 173 @property
155 174 def index_end(self):
156 175 return self._index_end
@@ -171,6 +190,16 b' class RevlogDocket(object):'
171 190 self._data_end = new_size
172 191 self._dirty = True
173 192
193 @property
194 def sidedata_end(self):
195 return self._sidedata_end
196
197 @sidedata_end.setter
198 def sidedata_end(self, new_size):
199 if new_size != self._sidedata_end:
200 self._sidedata_end = new_size
201 self._dirty = True
202
174 203 def write(self, transaction, pending=False, stripping=False):
175 204 """write the modification of disk if any
176 205
@@ -196,26 +225,33 b' class RevlogDocket(object):'
196 225 if pending:
197 226 official_index_end = self._initial_index_end
198 227 official_data_end = self._initial_data_end
228 official_sidedata_end = self._initial_sidedata_end
199 229 else:
200 230 official_index_end = self._index_end
201 231 official_data_end = self._data_end
232 official_sidedata_end = self._sidedata_end
202 233
203 234 # this assert should be True as long as we have a single index filename
204 235 assert official_data_end <= self._data_end
236 assert official_sidedata_end <= self._sidedata_end
205 237 data = (
206 238 self._version_header,
207 239 len(self._index_uuid),
208 240 len(self._data_uuid),
241 len(self._sidedata_uuid),
209 242 official_index_end,
210 243 self._index_end,
211 244 official_data_end,
212 245 self._data_end,
246 official_sidedata_end,
247 self._sidedata_end,
213 248 self.default_compression_header,
214 249 )
215 250 s = []
216 251 s.append(S_HEADER.pack(*data))
217 252 s.append(self._index_uuid)
218 253 s.append(self._data_uuid)
254 s.append(self._sidedata_uuid)
219 255 return b''.join(s)
220 256
221 257
@@ -262,6 +298,9 b' def parse_docket(revlog, data, use_pendi'
262 298 data_uuid_size = next(iheader)
263 299 data_uuid = get_data(data_uuid_size)
264 300
301 sidedata_uuid_size = next(iheader)
302 sidedata_uuid = get_data(sidedata_uuid_size)
303
265 304 index_size = next(iheader)
266 305
267 306 pending_index_size = next(iheader)
@@ -270,6 +309,10 b' def parse_docket(revlog, data, use_pendi'
270 309
271 310 pending_data_size = next(iheader)
272 311
312 sidedata_size = next(iheader)
313
314 pending_sidedata_size = next(iheader)
315
273 316 default_compression_header = next(iheader)
274 317
275 318 docket = RevlogDocket(
@@ -278,10 +321,13 b' def parse_docket(revlog, data, use_pendi'
278 321 version_header=version_header,
279 322 index_uuid=index_uuid,
280 323 data_uuid=data_uuid,
324 sidedata_uuid=sidedata_uuid,
281 325 index_end=index_size,
282 326 pending_index_end=pending_index_size,
283 327 data_end=data_size,
284 328 pending_data_end=pending_data_size,
329 sidedata_end=sidedata_size,
330 pending_sidedata_end=pending_sidedata_size,
285 331 default_compression_header=default_compression_header,
286 332 )
287 333 return docket
@@ -395,6 +395,7 b' REVLOG_FILES_OTHER_EXT = ('
395 395 b'.dat',
396 396 b'.n',
397 397 b'.nd',
398 b'.sda',
398 399 b'd.tmpcensored',
399 400 )
400 401 # files that are "volatile" and might change between listing and streaming
@@ -86,9 +86,11 b' We should have have:'
86 86 - a data file
87 87
88 88 $ ls .hg/store/00changelog* .hg/store/00manifest*
89 .hg/store/00changelog-6b8ab34b.dat
90 .hg/store/00changelog-88698448.idx
89 .hg/store/00changelog-1335303a.sda
90 .hg/store/00changelog-6b8ab34b.idx
91 .hg/store/00changelog-b875dfc5.dat
91 92 .hg/store/00changelog.i
92 .hg/store/00manifest-1335303a.dat
93 .hg/store/00manifest-b875dfc5.idx
93 .hg/store/00manifest-05a21d65.idx
94 .hg/store/00manifest-43c37dde.dat
95 .hg/store/00manifest-e2c9362a.sda
94 96 .hg/store/00manifest.i
General Comments 0
You need to be logged in to leave comments. Login now