##// END OF EJS Templates
revlog: improve the robustness of the splitting process...
marmoute -
r51242:87f0155d stable
parent child Browse files
Show More
@@ -481,7 +481,7 b' class changelog(revlog.revlog):'
481 self._delaybuf = None
481 self._delaybuf = None
482 self._divert = False
482 self._divert = False
483 # split when we're done
483 # split when we're done
484 self._enforceinlinesize(tr)
484 self._enforceinlinesize(tr, side_write=False)
485
485
486 def _writepending(self, tr):
486 def _writepending(self, tr):
487 """create a file containing the unfinalized state for
487 """create a file containing the unfinalized state for
@@ -512,9 +512,9 b' class changelog(revlog.revlog):'
512
512
513 return False
513 return False
514
514
515 def _enforceinlinesize(self, tr):
515 def _enforceinlinesize(self, tr, side_write=True):
516 if not self._delayed:
516 if not self._delayed:
517 revlog.revlog._enforceinlinesize(self, tr)
517 revlog.revlog._enforceinlinesize(self, tr, side_write=side_write)
518
518
519 def read(self, nodeorrev):
519 def read(self, nodeorrev):
520 """Obtain data from a parsed changelog revision.
520 """Obtain data from a parsed changelog revision.
@@ -25,7 +25,7 b' from .revlogutils import ('
25
25
26 @interfaceutil.implementer(repository.ifilestorage)
26 @interfaceutil.implementer(repository.ifilestorage)
27 class filelog:
27 class filelog:
28 def __init__(self, opener, path):
28 def __init__(self, opener, path, try_split=False):
29 self._revlog = revlog.revlog(
29 self._revlog = revlog.revlog(
30 opener,
30 opener,
31 # XXX should use the unencoded path
31 # XXX should use the unencoded path
@@ -33,6 +33,7 b' class filelog:'
33 radix=b'/'.join((b'data', path)),
33 radix=b'/'.join((b'data', path)),
34 censorable=True,
34 censorable=True,
35 canonical_parent_order=False, # see comment in revlog.py
35 canonical_parent_order=False, # see comment in revlog.py
36 try_split=try_split,
36 )
37 )
37 # Full name of the user visible file, relative to the repository root.
38 # Full name of the user visible file, relative to the repository root.
38 # Used by LFS.
39 # Used by LFS.
@@ -256,8 +257,8 b' class filelog:'
256 class narrowfilelog(filelog):
257 class narrowfilelog(filelog):
257 """Filelog variation to be used with narrow stores."""
258 """Filelog variation to be used with narrow stores."""
258
259
259 def __init__(self, opener, path, narrowmatch):
260 def __init__(self, opener, path, narrowmatch, try_split=False):
260 super(narrowfilelog, self).__init__(opener, path)
261 super(narrowfilelog, self).__init__(opener, path, try_split=try_split)
261 self._narrowmatch = narrowmatch
262 self._narrowmatch = narrowmatch
262
263
263 def renamed(self, node):
264 def renamed(self, node):
@@ -1240,7 +1240,12 b' class revlogfilestorage:'
1240 if path.startswith(b'/'):
1240 if path.startswith(b'/'):
1241 path = path[1:]
1241 path = path[1:]
1242
1242
1243 return filelog.filelog(self.svfs, path)
1243 try_split = (
1244 self.currenttransaction() is not None
1245 or txnutil.mayhavepending(self.root)
1246 )
1247
1248 return filelog.filelog(self.svfs, path, try_split=try_split)
1244
1249
1245
1250
1246 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
1251 @interfaceutil.implementer(repository.ilocalrepositoryfilestorage)
@@ -1251,7 +1256,13 b' class revlognarrowfilestorage:'
1251 if path.startswith(b'/'):
1256 if path.startswith(b'/'):
1252 path = path[1:]
1257 path = path[1:]
1253
1258
1254 return filelog.narrowfilelog(self.svfs, path, self._storenarrowmatch)
1259 try_split = (
1260 self.currenttransaction() is not None
1261 or txnutil.mayhavepending(self.root)
1262 )
1263 return filelog.narrowfilelog(
1264 self.svfs, path, self._storenarrowmatch, try_split=try_split
1265 )
1255
1266
1256
1267
1257 def makefilestorage(requirements, features, **kwargs):
1268 def makefilestorage(requirements, features, **kwargs):
@@ -302,6 +302,7 b' class revlog:'
302 persistentnodemap=False,
302 persistentnodemap=False,
303 concurrencychecker=None,
303 concurrencychecker=None,
304 trypending=False,
304 trypending=False,
305 try_split=False,
305 canonical_parent_order=True,
306 canonical_parent_order=True,
306 ):
307 ):
307 """
308 """
@@ -328,6 +329,7 b' class revlog:'
328 self._nodemap_file = None
329 self._nodemap_file = None
329 self.postfix = postfix
330 self.postfix = postfix
330 self._trypending = trypending
331 self._trypending = trypending
332 self._try_split = try_split
331 self.opener = opener
333 self.opener = opener
332 if persistentnodemap:
334 if persistentnodemap:
333 self._nodemap_file = nodemaputil.get_nodemap_file(self)
335 self._nodemap_file = nodemaputil.get_nodemap_file(self)
@@ -511,6 +513,8 b' class revlog:'
511 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
513 entry_point = b'%s.i.%s' % (self.radix, self.postfix)
512 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
514 elif self._trypending and self.opener.exists(b'%s.i.a' % self.radix):
513 entry_point = b'%s.i.a' % self.radix
515 entry_point = b'%s.i.a' % self.radix
516 elif self._try_split and self.opener.exists(b'%s.i.s' % self.radix):
517 entry_point = b'%s.i.s' % self.radix
514 else:
518 else:
515 entry_point = b'%s.i' % self.radix
519 entry_point = b'%s.i' % self.radix
516
520
@@ -2015,7 +2019,7 b' class revlog:'
2015 raise error.CensoredNodeError(self.display_id, node, text)
2019 raise error.CensoredNodeError(self.display_id, node, text)
2016 raise
2020 raise
2017
2021
2018 def _enforceinlinesize(self, tr):
2022 def _enforceinlinesize(self, tr, side_write=True):
2019 """Check if the revlog is too big for inline and convert if so.
2023 """Check if the revlog is too big for inline and convert if so.
2020
2024
2021 This should be called after revisions are added to the revlog. If the
2025 This should be called after revisions are added to the revlog. If the
@@ -2032,7 +2036,8 b' class revlog:'
2032 raise error.RevlogError(
2036 raise error.RevlogError(
2033 _(b"%s not found in the transaction") % self._indexfile
2037 _(b"%s not found in the transaction") % self._indexfile
2034 )
2038 )
2035 trindex = None
2039 if troffset:
2040 tr.addbackup(self._indexfile, for_offset=True)
2036 tr.add(self._datafile, 0)
2041 tr.add(self._datafile, 0)
2037
2042
2038 existing_handles = False
2043 existing_handles = False
@@ -2048,6 +2053,29 b' class revlog:'
2048 # No need to deal with sidedata writing handle as it is only
2053 # No need to deal with sidedata writing handle as it is only
2049 # relevant with revlog-v2 which is never inline, not reaching
2054 # relevant with revlog-v2 which is never inline, not reaching
2050 # this code
2055 # this code
2056 if side_write:
2057 old_index_file_path = self._indexfile
2058 new_index_file_path = self._indexfile + b'.s'
2059 opener = self.opener
2060
2061 fncache = getattr(opener, 'fncache', None)
2062 if fncache is not None:
2063 fncache.addignore(new_index_file_path)
2064
2065 # the "split" index replace the real index when the transaction is finalized
2066 def finalize_callback(tr):
2067 opener.rename(
2068 new_index_file_path,
2069 old_index_file_path,
2070 checkambig=True,
2071 )
2072
2073 tr.registertmp(new_index_file_path)
2074 if self.target[1] is not None:
2075 finalize_id = b'000-revlog-split-%d-%s' % self.target
2076 else:
2077 finalize_id = b'000-revlog-split-%d' % self.target[0]
2078 tr.addfinalize(finalize_id, finalize_callback)
2051
2079
2052 new_dfh = self._datafp(b'w+')
2080 new_dfh = self._datafp(b'w+')
2053 new_dfh.truncate(0) # drop any potentially existing data
2081 new_dfh.truncate(0) # drop any potentially existing data
@@ -2055,17 +2083,10 b' class revlog:'
2055 with self._indexfp() as read_ifh:
2083 with self._indexfp() as read_ifh:
2056 for r in self:
2084 for r in self:
2057 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2085 new_dfh.write(self._getsegmentforrevs(r, r, df=read_ifh)[1])
2058 if (
2059 trindex is None
2060 and troffset
2061 <= self.start(r) + r * self.index.entry_size
2062 ):
2063 trindex = r
2064 new_dfh.flush()
2086 new_dfh.flush()
2065
2087
2066 if trindex is None:
2088 if side_write:
2067 trindex = 0
2089 self._indexfile = new_index_file_path
2068
2069 with self.__index_new_fp() as fp:
2090 with self.__index_new_fp() as fp:
2070 self._format_flags &= ~FLAG_INLINE_DATA
2091 self._format_flags &= ~FLAG_INLINE_DATA
2071 self._inline = False
2092 self._inline = False
@@ -2079,16 +2100,9 b' class revlog:'
2079 if self._docket is not None:
2100 if self._docket is not None:
2080 self._docket.index_end = fp.tell()
2101 self._docket.index_end = fp.tell()
2081
2102
2082 # There is a small transactional race here. If the rename of
2103 # If we don't use side-write, the temp file replace the real
2083 # the index fails, we should remove the datafile. It is more
2104 # index when we exit the context manager
2084 # important to ensure that the data file is not truncated
2105
2085 # when the index is replaced as otherwise data is lost.
2086 tr.replace(self._datafile, self.start(trindex))
2087
2088 # the temp file replace the real index when we exit the context
2089 # manager
2090
2091 tr.replace(self._indexfile, trindex * self.index.entry_size)
2092 nodemaputil.setup_persistent_nodemap(tr, self)
2106 nodemaputil.setup_persistent_nodemap(tr, self)
2093 self._segmentfile = randomaccessfile.randomaccessfile(
2107 self._segmentfile = randomaccessfile.randomaccessfile(
2094 self.opener,
2108 self.opener,
@@ -9,12 +9,12 b' Helper extension to intercept renames an'
9 > from mercurial import extensions, util
9 > from mercurial import extensions, util
10 >
10 >
11 > def extsetup(ui):
11 > def extsetup(ui):
12 > def close(orig, *args, **kwargs):
12 > def rename(orig, src, dest, *args, **kwargs):
13 > path = util.normpath(args[0]._atomictempfile__name)
13 > path = util.normpath(dest)
14 > if path.endswith(b'/.hg/store/data/file.i'):
14 > if path.endswith(b'data/file.i'):
15 > os.kill(os.getpid(), signal.SIGKILL)
15 > os.kill(os.getpid(), signal.SIGKILL)
16 > return orig(*args, **kwargs)
16 > return orig(src, dest, *args, **kwargs)
17 > extensions.wrapfunction(util.atomictempfile, 'close', close)
17 > extensions.wrapfunction(util, 'rename', rename)
18 > EOF
18 > EOF
19
19
20 $ cat > $TESTTMP/intercept_after_rename.py << EOF
20 $ cat > $TESTTMP/intercept_after_rename.py << EOF
@@ -30,6 +30,14 b' Helper extension to intercept renames an'
30 > os.kill(os.getpid(), signal.SIGKILL)
30 > os.kill(os.getpid(), signal.SIGKILL)
31 > return r
31 > return r
32 > extensions.wrapfunction(util.atomictempfile, 'close', close)
32 > extensions.wrapfunction(util.atomictempfile, 'close', close)
33 > def extsetup(ui):
34 > def rename(orig, src, dest, *args, **kwargs):
35 > path = util.normpath(dest)
36 > r = orig(src, dest, *args, **kwargs)
37 > if path.endswith(b'data/file.i'):
38 > os.kill(os.getpid(), signal.SIGKILL)
39 > return r
40 > extensions.wrapfunction(util, 'rename', rename)
33 > EOF
41 > EOF
34
42
35 $ cat > $TESTTMP/killme.py << EOF
43 $ cat > $TESTTMP/killme.py << EOF
@@ -75,7 +83,7 b' setup a repository for tests'
75 $ printf '%20d' '1' > file
83 $ printf '%20d' '1' > file
76 $ hg commit -Aqmc
84 $ hg commit -Aqmc
77 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
85 $ dd if=/dev/zero of=file bs=1k count=128 > /dev/null 2>&1
78 $ hg commit -AqmD
86 $ hg commit -AqmD --traceback
79
87
80 Reference size:
88 Reference size:
81 $ f -s file
89 $ f -s file
@@ -127,32 +135,33 b' Reference size:'
127 #endif
135 #endif
128
136
129
137
130 The revlog have been split on disk
138 The inline revlog still exist, but a split version exist next to it
131
139
132 $ f -s .hg/store/data/file*
140 $ f -s .hg/store/data/file*
133 .hg/store/data/file.d: size=132139
141 .hg/store/data/file.d: size=132139
134 .hg/store/data/file.i: size=256
142 .hg/store/data/file.i: size=132395
143 .hg/store/data/file.i.s: size=256
135
144
136 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file | tail -1
137 data/file.i 128
138
145
139 The first file.i entry should match the "Reference size" above.
146 The first file.i entry should match the "Reference size" above.
140 The first file.d entry is the temporary record during the split,
147 The first file.d entry is the temporary record during the split,
141
148
142 The second entry after the split happened. The sum of the second file.d
149 A "temporary file" entry exist for the split index.
143 and the second file.i entry should match the first file.i entry.
144
150
145 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
151 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
146 data/file.i 1174
152 data/file.i 1174
147 data/file.d 0
153 data/file.d 0
148 data/file.d 1046
154 $ cat .hg/store/journal.backupfiles | tr -s '\000' ' ' | tr -s '\00' ' '| grep data/file
149 data/file.i 128
155 data/file.i data/journal.backup.file.i 0
156 data/file.i.s 0
157
158 recover is rolling the split back, the fncache is still valid
159
150 $ hg recover
160 $ hg recover
151 rolling back interrupted transaction
161 rolling back interrupted transaction
152 (verify step skipped, run `hg verify` to check your repository content)
162 (verify step skipped, run `hg verify` to check your repository content)
153 $ f -s .hg/store/data/file*
163 $ f -s .hg/store/data/file*
154 .hg/store/data/file.d: size=1046
164 .hg/store/data/file.i: size=1174
155 .hg/store/data/file.i: size=128
156 $ hg tip
165 $ hg tip
157 changeset: 1:cfa8d6e60429
166 changeset: 1:cfa8d6e60429
158 tag: tip
167 tag: tip
@@ -161,12 +170,8 b' and the second file.i entry should match'
161 summary: b
170 summary: b
162
171
163 $ hg verify -q
172 $ hg verify -q
164 warning: revlog 'data/file.d' not in fncache!
165 1 warnings encountered!
166 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
167 $ hg debugrebuildfncache --only-data
173 $ hg debugrebuildfncache --only-data
168 adding data/file.d
174 fncache already up to date
169 1 items added, 0 removed from fncache
170 $ hg verify -q
175 $ hg verify -q
171 $ cd ..
176 $ cd ..
172
177
@@ -189,36 +194,43 b' Reference size:'
189 $ cat > .hg/hgrc <<EOF
194 $ cat > .hg/hgrc <<EOF
190 > [extensions]
195 > [extensions]
191 > intercept_rename = $TESTTMP/intercept_before_rename.py
196 > intercept_rename = $TESTTMP/intercept_before_rename.py
192 > [hooks]
193 > pretxnchangegroup = python:$TESTTMP/killme.py:killme
194 > EOF
197 > EOF
195 #if chg
198 #if chg
196 $ hg pull ../troffset-computation
199 $ hg pull ../troffset-computation
197 pulling from ../troffset-computation
200 pulling from ../troffset-computation
201 searching for changes
202 adding changesets
203 adding manifests
204 adding file changes
198 [255]
205 [255]
199 #else
206 #else
200 $ hg pull ../troffset-computation
207 $ hg pull ../troffset-computation
201 pulling from ../troffset-computation
208 pulling from ../troffset-computation
209 searching for changes
210 adding changesets
211 adding manifests
212 adding file changes
202 Killed
213 Killed
203 [137]
214 [137]
204 #endif
215 #endif
205
216
206 The data file is created, but the revlog is still inline
217 The inline revlog still exist, but a split version exist next to it
207
218
208 $ f -s .hg/store/data/file*
219 $ f -s .hg/store/data/file*
209 .hg/store/data/file.d: size=132139
220 .hg/store/data/file.d: size=132139
210 .hg/store/data/file.i: size=132395
221 .hg/store/data/file.i: size=132395
222 .hg/store/data/file.i.s: size=256
211
223
212 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
224 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
213 data/file.i 1174
225 data/file.i 1174
214 data/file.d 0
226 data/file.d 0
215 data/file.d 1046
227
228 recover is rolling the split back, the fncache is still valid
216
229
217 $ hg recover
230 $ hg recover
218 rolling back interrupted transaction
231 rolling back interrupted transaction
219 (verify step skipped, run `hg verify` to check your repository content)
232 (verify step skipped, run `hg verify` to check your repository content)
220 $ f -s .hg/store/data/file*
233 $ f -s .hg/store/data/file*
221 .hg/store/data/file.d: size=1046
222 .hg/store/data/file.i: size=1174
234 .hg/store/data/file.i: size=1174
223 $ hg tip
235 $ hg tip
224 changeset: 1:cfa8d6e60429
236 changeset: 1:cfa8d6e60429
@@ -235,8 +247,6 b' Test a hard crash right after the index '
235
247
236 Now retry the procedure but intercept the rename of the index.
248 Now retry the procedure but intercept the rename of the index.
237
249
238 Things get corrupted /o\
239
240 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-crash-after-rename
250 $ hg clone --quiet --rev 1 troffset-computation troffset-computation-crash-after-rename
241 $ cd troffset-computation-crash-after-rename
251 $ cd troffset-computation-crash-after-rename
242
252
@@ -249,21 +259,27 b' Reference size:'
249 $ cat > .hg/hgrc <<EOF
259 $ cat > .hg/hgrc <<EOF
250 > [extensions]
260 > [extensions]
251 > intercept_rename = $TESTTMP/intercept_after_rename.py
261 > intercept_rename = $TESTTMP/intercept_after_rename.py
252 > [hooks]
253 > pretxnchangegroup = python:$TESTTMP/killme.py:killme
254 > EOF
262 > EOF
255 #if chg
263 #if chg
256 $ hg pull ../troffset-computation
264 $ hg pull ../troffset-computation
257 pulling from ../troffset-computation
265 pulling from ../troffset-computation
266 searching for changes
267 adding changesets
268 adding manifests
269 adding file changes
258 [255]
270 [255]
259 #else
271 #else
260 $ hg pull ../troffset-computation
272 $ hg pull ../troffset-computation
261 pulling from ../troffset-computation
273 pulling from ../troffset-computation
274 searching for changes
275 adding changesets
276 adding manifests
277 adding file changes
262 Killed
278 Killed
263 [137]
279 [137]
264 #endif
280 #endif
265
281
266 the revlog has been split on disk
282 The inline revlog was over written on disk
267
283
268 $ f -s .hg/store/data/file*
284 $ f -s .hg/store/data/file*
269 .hg/store/data/file.d: size=132139
285 .hg/store/data/file.d: size=132139
@@ -272,16 +288,14 b' the revlog has been split on disk'
272 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
288 $ cat .hg/store/journal | tr -s '\000' ' ' | grep data/file
273 data/file.i 1174
289 data/file.i 1174
274 data/file.d 0
290 data/file.d 0
275 data/file.d 1046
291
292 recover is rolling the split back, the fncache is still valid
276
293
277 $ hg recover
294 $ hg recover
278 rolling back interrupted transaction
295 rolling back interrupted transaction
279 abort: attempted to truncate data/file.i to 1174 bytes, but it was already 256 bytes
296 (verify step skipped, run `hg verify` to check your repository content)
280
281 [255]
282 $ f -s .hg/store/data/file*
297 $ f -s .hg/store/data/file*
283 .hg/store/data/file.d: size=1046
298 .hg/store/data/file.i: size=1174
284 .hg/store/data/file.i: size=256
285 $ hg tip
299 $ hg tip
286 changeset: 1:cfa8d6e60429
300 changeset: 1:cfa8d6e60429
287 tag: tip
301 tag: tip
@@ -290,23 +304,6 b' the revlog has been split on disk'
290 summary: b
304 summary: b
291
305
292 $ hg verify -q
306 $ hg verify -q
293 abandoned transaction found - run hg recover
294 warning: revlog 'data/file.d' not in fncache!
295 file@0: data length off by -131093 bytes
296 file@2: unpacking fa1120531cc1: partial read of revlog data/file.d; expected 21 bytes from offset 1046, got 0
297 file@3: unpacking a631378adaa3: partial read of revlog data/file.d; expected 131072 bytes from offset 1067, got -21
298 file@?: rev 2 points to nonexistent changeset 2
299 (expected )
300 file@?: fa1120531cc1 not in manifests
301 file@?: rev 3 points to nonexistent changeset 3
302 (expected )
303 file@?: a631378adaa3 not in manifests
304 not checking dirstate because of previous errors
305 3 warnings encountered!
306 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
307 7 integrity errors encountered!
308 (first damaged changeset appears to be 0)
309 [1]
310 $ cd ..
307 $ cd ..
311
308
312 Have the transaction rollback itself without any hard crash
309 Have the transaction rollback itself without any hard crash
@@ -332,11 +329,12 b' Repeat the original test but let hg roll'
332 abort: pretxnchangegroup hook exited with status 1
329 abort: pretxnchangegroup hook exited with status 1
333 [40]
330 [40]
334
331
335 File are still split on disk, with the expected size.
332 The split was rollback
336
333
337 $ f -s .hg/store/data/file*
334 $ f -s .hg/store/data/file*
338 .hg/store/data/file.d: size=1046
335 .hg/store/data/file.d: size=0
339 .hg/store/data/file.i: size=128
336 .hg/store/data/file.i: size=1174
337
340
338
341 $ hg tip
339 $ hg tip
342 changeset: 1:cfa8d6e60429
340 changeset: 1:cfa8d6e60429
@@ -346,9 +344,6 b' File are still split on disk, with the e'
346 summary: b
344 summary: b
347
345
348 $ hg verify -q
346 $ hg verify -q
349 warning: revlog 'data/file.d' not in fncache!
350 1 warnings encountered!
351 hint: run "hg debugrebuildfncache" to recover from corrupt fncache
352 $ cd ..
347 $ cd ..
353
348
354 Read race
349 Read race
General Comments 0
You need to be logged in to leave comments. Login now