##// END OF EJS Templates
revlog: improve the robustness of the splitting process...
marmoute -
r51242:87f0155d stable
parent child Browse files
Show More
@@ -481,7 +481,7 b' class changelog(revlog.revlog):'
481 481 self._delaybuf = None
482 482 self._divert = False
483 483 # split when we're done
484 self._enforceinlinesize(tr)
484 self._enforceinlinesize(tr, side_write=False)
485 485
486 486 def _writepending(self, tr):
487 487 """create a file containing the unfinalized state for
@@ -512,9 +512,9 b' class changelog(revlog.revlog):'
512 512
513 513 return False
514 514
515 def _enforceinlinesize(self, tr):
515 def _enforceinlinesize(self, tr, side_write=True):
516 516 if not self._delayed:
517 revlog.revlog._enforceinlinesize(self, tr)
517 revlog.revlog._enforceinlinesize(self, tr, side_write=side_write)
518 518
519 519 def read(self, nodeorrev):
520 520 """Obtain data from a parsed changelog revision.
@@ -25,7 +25,7 b' from .revlogutils import ('
25 25
26 26 @interfaceutil.implementer(repository.ifilestorage)
27 27 class filelog:
28 def __init__(self, opener, path):
28 def __init__(self, opener, path, try_split=False):
29 29 self._revlog = revlog.revlog(
30 30 opener,
31 31 # XXX should use the unencoded path
@@ -33,6 +33,7 b' class filelog:'
33 33 radix=b'/'.join((b'data', path)),
34 34 censorable=True,
35 35 canonical_parent_order=False, # see comment in revlog.py
36 try_split=try_split,
36 37 )
37 38 # Full name of the user visible file, relative to the repository root.
38 39 # Used by LFS.
@@ -256,8 +257,8 b' class filelog:'
256 257 class narrowfilelog(filelog):
257 258 """Filelog variation to be used with narrow stores."""
258 259
259 def __init__(self, opener, path, narrowmatch):
260 super(narrowfilelog, self).__init__(opener, path)
260 def __init__(self, opener, path, narrowmatch, try_split=False):
261 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
261 262 self._narrowmatch = narrowmatch
262 263
263 264 def renamed(self, node):
@@ -1240,7 +1240,12 b' class revlogfilestorage:'
1240 1240 if path.startswith(b'/'):
1241 1241 path = path[1:]
1242 1242
1243 return filelog.filelog(self.svfs, path)
1243 try_split = (
1244 self.currenttransaction() is not None
1245 or txnutil.mayhavepending(self.root)
1246 )
1247
1248 return filelog.filelog(self.svfs, path, try_split=try_split)
1244 1249
1245 1250
1246 1251 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
@@ -1251,7 +1256,13 b' class revlognarrowfilestorage:'
1251 1256 if path.startswith(b'/'):
1252 1257 path = path[1:]
1253 1258
1254 return filelog.narrowfilelog(self.svfs, path, self._storenarrowmatch)
1259 try_split = (
1260 self.currenttransaction() is not None
1261 or txnutil.mayhavepending(self.root)
1262 )
1263 return filelog.narrowfilelog(
1264 self.svfs, path, self._storenarrowmatch, try_split=try_split
1265 )
1255 1266
1256 1267
1257 1268 def makefilestorage(requirements, features, **kwargs):
@@ -302,6 +302,7 b' class revlog:'
302 302 persistentnodemap=False,
303 303 concurrencychecker=None,
304 304 trypending=False,
305 try_split=False,
305 306 canonical_parent_order=True,
306 307 ):
307 308 """
@@ -328,6 +329,7 b' class revlog:'
328 329 self._nodemap_file = None
329 330 self.postfix = postfix
330 331 self._trypending = trypending
332 self._try_split = try_split
331 333 self.opener = opener
332 334 if persistentnodemap:
333 335 self._nodemap_file = nodemaputil.get_nodemap_file(self)
@@ -511,6 +513,8 b' class revlog:'
511 513 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
512 514 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
513 515 entry_point = b'%s.i.a' % self.radix
516 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
517 entry_point = b'%s.i.s' % self.radix
514 518 else:
515 519 entry_point = b'%s.i' % self.radix
516 520
@@ -2015,7 +2019,7 b' class revlog:'
2015 2019 raise error.CensoredNodeError(self.display_id, node, text)
2016 2020 raise
2017 2021
2018 def _enforceinlinesize(self, tr):
2022 def _enforceinlinesize(self, tr, side_write=True):
2019 2023 """Check if the revlog is too big for inline and convert if so.
2020 2024
2021 2025 This should be called after revisions are added to the revlog. If the
@@ -2032,7 +2036,8 b' class revlog:'
2032 2036 raise error.RevlogError(
2033 2037 _(b"%s not found in the transaction") % self._indexfile
2034 2038 )
2035 trindex = None
2039 if troffset:
2040 tr.addbackup(self._indexfile, for_offset=True)
2036 2041 tr.add(self._datafile, 0)
2037 2042
2038 2043 existing_handles = False
@@ -2048,6 +2053,29 b' class revlog:'
2048 2053 # No need to deal with sidedata writing handle as it is only
2049 2054 # relevant with revlog-v2 which is never inline, not reaching
2050 2055 # this code
2056 if side_write:
2057 old_index_file_path = self._indexfile
2058 new_index_file_path = self._indexfile + b'.s'
2059 opener = self.opener
2060
2061 fncache = getattr(opener, 'fncache', None)
2062 if fncache is not None:
2063 fncache.addignore(new_index_file_path)
2064
2065 # the "split" index replace the real index when the transaction is finalized
2066 def finalize_callback(tr):
2067 opener.rename(
2068 new_index_file_path,
2069 old_index_file_path,
2070 checkambig=True,
2071 )
2072
2073 tr.registertmp(new_index_file_path)
2074 if self.target[1] is not None:
2075 finalize_id = b'000-revlog-split-%d-%s' % self.target
2076 else:
2077 finalize_id = b'000-revlog-split-%d' % self.target[0]
2078 tr.addfinalize(finalize_id, finalize_callback)
2051 2079
2052 2080 new_dfh = self._datafp(b'w+')
2053 2081 new_dfh.truncate(0) # drop any potentially existing data
@@ -2055,17 +2083,10 b' class revlog:'
2055 2083 with self._indexfp() as read_ifh:
2056 2084 for r in self:
2057 2085 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2058 if (
2059 trindex is None
2060 and troffset
2061 <= self.start(r) + r * self.index.entry_size
2062 ):
2063 trindex = r
2064 2086 new_dfh.flush()
2065 2087
2066 if trindex is None:
2067 trindex = 0
2068
2088 if side_write:
2089 self._indexfile = new_index_file_path
2069 2090 with self.__index_new_fp() as fp:
2070 2091 self._format_flags &= ~FLAG_INLINE_DATA
2071 2092 self._inline = False
@@ -2079,16 +2100,9 b' class revlog:'
2079 2100 if self._docket is not None:
2080 2101 self._docket.index_end = fp.tell()
2081 2102
2082 # There is a small transactional race here. If the rename of
2083 # the index fails, we should remove the datafile. It is more
2084 # important to ensure that the data file is not truncated
2085 # when the index is replaced as otherwise data is lost.
2086 tr.replace(self._datafile, self.start(trindex))
2087
2088 # the temp file replace the real index when we exit the context
2089 # manager
2090
2091 tr.replace(self._indexfile, trindex * self.index.entry_size)
2103 # If we don't use side-write, the temp file replace the real
2104 # index when we exit the context manager
2105
2092 2106 nodemaputil.setup_persistent_nodemap(tr, self)
2093 2107 self._segmentfile = randomaccessfile.randomaccessfile(
2094 2108 self.opener,
@@ -9,12 +9,12 b' Helper extension to intercept renames an'
9 9 > from mercurial import extensions, util
10 10 >
11 11 > def extsetup(ui):
12 > def close(orig, *args, **kwargs):
13 > path = util.normpath(args[0]._atomictempfile__name)
14 > if path.endswith(b'/.hg/store/data/file.i'):
12 > def rename(orig, src, dest, *args, **kwargs):
13 > path = util.normpath(dest)
14 > if path.endswith(b'data/file.i'):
15 15 > os.kill(os.getpid(), signal.SIGKILL)
16 > return orig(*args, **kwargs)
17 > extensions.wrapfunction(util.atomictempfile, 'close', close)
16 > return orig(src, dest, *args, **kwargs)
17 > extensions.wrapfunction(util, 'rename', rename)
18 18 > EOF
19 19
20 20 $ cat > $TESTTMP/intercept_after_rename.py << EOF
@@ -30,6 +30,14 b' Helper extension to intercept renames an'
30 30 > os.kill(os.getpid(), signal.SIGKILL)
31 31 > return r
32 32 > extensions.wrapfunction(util.atomictempfile, 'close', close)
33 > def extsetup(ui):
34 > def rename(orig, src, dest, *args, **kwargs):
35 > path = util.normpath(dest)
36 > r = orig(src, dest, *args, **kwargs)
37 > if path.endswith(b'data/file.i'):
38 > os.kill(os.getpid(), signal.SIGKILL)
39 > return r
40 > extensions.wrapfunction(util, 'rename', rename)
33 41 > EOF
34 42
35 43 $ cat > $TESTTMP/killme.py << EOF
@@ -75,7 +83,7 b' setup a repository for tests'
75 83 $ printf '%20d' '1' > file
76 84 $ hg commit -Aqmc
77 85 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
78 $ hg commit -AqmD
86 $ hg commit -AqmD --traceback
79 87
80 88 Reference size:
81 89 $ f -s file
@@ -127,32 +135,33 b' Reference size:'
127 135 #endif
128 136
129 137
130 The revlog have been split on disk
138 The inline revlog still exist, but a split version exist next to it
131 139
132 140 $ f -s .hg/store/data/file*
133 141 .hg/store/data/file.d: size=132139
134 .hg/store/data/file.i: size=256
142 .hg/store/data/file.i: size=132395
143 .hg/store/data/file.i.s: size=256
135 144
136 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
137 data/file.i 128
138 145
139 146 The first file.i entry should match the "Reference size" above.
140 147 The first file.d entry is the temporary record during the split,
141 148
142 The second entry after the split happened. The sum of the second file.d
143 and the second file.i entry should match the first file.i entry.
149 A "temporary file" entry exist for the split index.
144 150
145 151 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
146 152 data/file.i 1174
147 153 data/file.d 0
148 data/file.d 1046
149 data/file.i 128
154 $ cat .hg/store/journal.backupfiles | tr -s '\000' ' ' | tr -s '\00' ' '| grep data/file
155 data/file.i data/journal.backup.file.i 0
156 data/file.i.s 0
157
158 recover is rolling the split back, the fncache is still valid
159
150 160 $ hg recover
151 161 rolling back interrupted transaction
152 162 (verify step skipped, run `hg verify` to check your repository content)
153 163 $ f -s .hg/store/data/file*
154 .hg/store/data/file.d: size=1046
155 .hg/store/data/file.i: size=128
164 .hg/store/data/file.i: size=1174
156 165 $ hg tip
157 166 changeset: 1:cfa8d6e60429
158 167 tag: tip
@@ -161,12 +170,8 b' and the second file.i entry should match'
161 170 summary: b
162 171
163 172 $ hg verify -q
164 warning: revlog 'data/file.d' not in fncache!
165 1 warnings encountered!
166 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
167 173 $ hg debugrebuildfncache --only-data
168 adding data/file.d
169 1 items added, 0 removed from fncache
174 fncache already up to date
170 175 $ hg verify -q
171 176 $ cd ..
172 177
@@ -189,36 +194,43 b' Reference size:'
189 194 $ cat > .hg/hgrc <<EOF
190 195 > [extensions]
191 196 > intercept_rename = $TESTTMP/intercept_before_rename.py
192 > [hooks]
193 > pretxnchangegroup = python:$TESTTMP/killme.py:killme
194 197 > EOF
195 198 #if chg
196 199 $ hg pull ../troffset-computation
197 200 pulling from ../troffset-computation
201 searching for changes
202 adding changesets
203 adding manifests
204 adding file changes
198 205 [255]
199 206 #else
200 207 $ hg pull ../troffset-computation
201 208 pulling from ../troffset-computation
209 searching for changes
210 adding changesets
211 adding manifests
212 adding file changes
202 213 Killed
203 214 [137]
204 215 #endif
205 216
206 The data file is created, but the revlog is still inline
217 The inline revlog still exist, but a split version exist next to it
207 218
208 219 $ f -s .hg/store/data/file*
209 220 .hg/store/data/file.d: size=132139
210 221 .hg/store/data/file.i: size=132395
222 .hg/store/data/file.i.s: size=256
211 223
212 224 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
213 225 data/file.i 1174
214 226 data/file.d 0
215 data/file.d 1046
227
228 recover is rolling the split back, the fncache is still valid
216 229
217 230 $ hg recover
218 231 rolling back interrupted transaction
219 232 (verify step skipped, run `hg verify` to check your repository content)
220 233 $ f -s .hg/store/data/file*
221 .hg/store/data/file.d: size=1046
222 234 .hg/store/data/file.i: size=1174
223 235 $ hg tip
224 236 changeset: 1:cfa8d6e60429
@@ -235,8 +247,6 b' Test a hard crash right after the index '
235 247
236 248 Now retry the procedure but intercept the rename of the index.
237 249
238 Things get corrupted /o\
239
240 250 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-crash-after-rename
241 251 $ cd troffset-computation-crash-after-rename
242 252
@@ -249,21 +259,27 b' Reference size:'
249 259 $ cat > .hg/hgrc <<EOF
250 260 > [extensions]
251 261 > intercept_rename = $TESTTMP/intercept_after_rename.py
252 > [hooks]
253 > pretxnchangegroup = python:$TESTTMP/killme.py:killme
254 262 > EOF
255 263 #if chg
256 264 $ hg pull ../troffset-computation
257 265 pulling from ../troffset-computation
266 searching for changes
267 adding changesets
268 adding manifests
269 adding file changes
258 270 [255]
259 271 #else
260 272 $ hg pull ../troffset-computation
261 273 pulling from ../troffset-computation
274 searching for changes
275 adding changesets
276 adding manifests
277 adding file changes
262 278 Killed
263 279 [137]
264 280 #endif
265 281
266 the revlog has been split on disk
282 The inline revlog was over written on disk
267 283
268 284 $ f -s .hg/store/data/file*
269 285 .hg/store/data/file.d: size=132139
@@ -272,16 +288,14 b' the revlog has been split on disk'
272 288 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
273 289 data/file.i 1174
274 290 data/file.d 0
275 data/file.d 1046
291
292 recover is rolling the split back, the fncache is still valid
276 293
277 294 $ hg recover
278 295 rolling back interrupted transaction
279 abort: attempted to truncate data/file.i to 1174 bytes, but it was already 256 bytes
280
281 [255]
296 (verify step skipped, run `hg verify` to check your repository content)
282 297 $ f -s .hg/store/data/file*
283 .hg/store/data/file.d: size=1046
284 .hg/store/data/file.i: size=256
298 .hg/store/data/file.i: size=1174
285 299 $ hg tip
286 300 changeset: 1:cfa8d6e60429
287 301 tag: tip
@@ -290,23 +304,6 b' the revlog has been split on disk'
290 304 summary: b
291 305
292 306 $ hg verify -q
293 abandoned transaction found - run hg recover
294 warning: revlog 'data/file.d' not in fncache!
295 file@0: data length off by -131093 bytes
296 file@2: unpacking fa1120531cc1: partial read of revlog data/file.d; expected 21 bytes from offset 1046, got 0
297 file@3: unpacking a631378adaa3: partial read of revlog data/file.d; expected 131072 bytes from offset 1067, got -21
298 file@?: rev 2 points to nonexistent changeset 2
299 (expected )
300 file@?: fa1120531cc1 not in manifests
301 file@?: rev 3 points to nonexistent changeset 3
302 (expected )
303 file@?: a631378adaa3 not in manifests
304 not checking dirstate because of previous errors
305 3 warnings encountered!
306 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
307 7 integrity errors encountered!
308 (first damaged changeset appears to be 0)
309 [1]
310 307 $ cd ..
311 308
312 309 Have the transaction rollback itself without any hard crash
@@ -332,11 +329,12 b' Repeat the original test but let hg roll'
332 329 abort: pretxnchangegroup hook exited with status 1
333 330 [40]
334 331
335 File are still split on disk, with the expected size.
332 The split was rollback
336 333
337 334 $ f -s .hg/store/data/file*
338 .hg/store/data/file.d: size=1046
339 .hg/store/data/file.i: size=128
335 .hg/store/data/file.d: size=0
336 .hg/store/data/file.i: size=1174
337
340 338
341 339 $ hg tip
342 340 changeset: 1:cfa8d6e60429
@@ -346,9 +344,6 b' File are still split on disk, with the e'
346 344 summary: b
347 345
348 346 $ hg verify -q
349 warning: revlog 'data/file.d' not in fncache!
350 1 warnings encountered!
351 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
352 347 $ cd ..
353 348
354 349 Read race
General Comments 0
You need to be logged in to leave comments. Login now