##// END OF EJS Templates
revlog: add a mechanism to verify expected file position before appending...
Kyle Lippincott -
r47349:e9901d01 default
parent child Browse files
Show More
@@ -0,0 +1,38 b''
1 from ..i18n import _
2 from .. import error
3
4
5 def get_checker(ui, revlog_name=b'changelog'):
6 """Get a function that checks file handle position is as expected.
7
8 This is used to ensure that files haven't been modified outside of our
9 knowledge (such as on a networked filesystem, if `hg debuglocks` was used,
10 or writes to .hg that ignored locks happened).
11
12 Due to revlogs supporting a concept of buffered, delayed, or diverted
13 writes, we're allowing the files to be shorter than expected (the data may
14 not have been written yet), but they can't be longer.
15
16 Please note that this check is not perfect; it can't detect all cases (there
17 may be false-negatives/false-OKs), but it should never claim there's an
18 issue when there isn't (false-positives/false-failures).
19 """
20
21 vpos = ui.config(b'debug', b'revlog.verifyposition.' + revlog_name)
22 # Avoid any `fh.tell` cost if this isn't enabled.
23 if not vpos or vpos not in [b'log', b'warn', b'fail']:
24 return None
25
26 def _checker(fh, fn, expected):
27 if fh.tell() <= expected:
28 return
29
30 msg = _(b'%s: file cursor at position %d, expected %d')
31 # Always log if we're going to warn or fail.
32 ui.log(b'debug', msg + b'\n', fn, fh.tell(), expected)
33 if vpos == b'warn':
34 ui.warn((msg + b'\n') % (fn, fh.tell(), expected))
35 elif vpos == b'fail':
36 raise error.RevlogError(msg % (fn, fh.tell(), expected))
37
38 return _checker
@@ -0,0 +1,102 b''
1 #testcases skip-detection fail-if-detected
2
3 Test situations that "should" only be reproducible:
4 - on networked filesystems, or
5 - user using `hg debuglocks` to eliminate the lock file, or
6 - something (that doesn't respect the lock file) writing to the .hg directory
7 while we're running
8
9 $ hg init a
10 $ cd a
11
12 $ cat > "$TESTTMP/waitlock_editor.sh" <<EOF
13 > [ -n "\${WAITLOCK_ANNOUNCE:-}" ] && touch "\${WAITLOCK_ANNOUNCE}"
14 > f="\${WAITLOCK_FILE}"
15 > start=\`date +%s\`
16 > timeout=5
17 > while [ \\( ! -f \$f \\) -a \\( ! -L \$f \\) ]; do
18 > now=\`date +%s\`
19 > if [ "\`expr \$now - \$start\`" -gt \$timeout ]; then
20 > echo "timeout: \$f was not created in \$timeout seconds (it is now \$(date +%s))"
21 > exit 1
22 > fi
23 > sleep 0.1
24 > done
25 > if [ \$# -gt 1 ]; then
26 > cat "\$@"
27 > fi
28 > EOF
29 $ chmod +x "$TESTTMP/waitlock_editor.sh"
30
31 Things behave differently if we don't already have a 00changelog.i file when
32 this all starts, so let's make one.
33
34 $ echo r0 > r0
35 $ hg commit -qAm 'r0'
36
37 Start an hg commit that will take a while
38 $ EDITOR_STARTED="$(pwd)/.editor_started"
39 $ MISCHIEF_MANAGED="$(pwd)/.mischief_managed"
40 $ JOBS_FINISHED="$(pwd)/.jobs_finished"
41
42 #if fail-if-detected
43 $ cat >> .hg/hgrc << EOF
44 > [debug]
45 > revlog.verifyposition.changelog = fail
46 > EOF
47 #endif
48
49 $ echo foo > foo
50 $ (WAITLOCK_ANNOUNCE="${EDITOR_STARTED}" \
51 > WAITLOCK_FILE="${MISCHIEF_MANAGED}" \
52 > HGEDITOR="$TESTTMP/waitlock_editor.sh" \
53 > hg commit -qAm 'r1 (foo)' --edit foo > .foo_commit_out 2>&1 ; touch "${JOBS_FINISHED}") &
54
55 Wait for the "editor" to actually start
56 $ WAITLOCK_FILE="${EDITOR_STARTED}" "$TESTTMP/waitlock_editor.sh"
57
58 Break the locks, and make another commit.
59 $ hg debuglocks -LW
60 $ echo bar > bar
61 $ hg commit -qAm 'r2 (bar)' bar
62 $ hg debugrevlogindex -c
63 rev linkrev nodeid p1 p2
64 0 0 222799e2f90b 000000000000 000000000000
65 1 1 6f124f6007a0 222799e2f90b 000000000000
66
67 Awaken the editor from that first commit
68 $ touch "${MISCHIEF_MANAGED}"
69 And wait for it to finish
70 $ WAITLOCK_FILE="${JOBS_FINISHED}" "$TESTTMP/waitlock_editor.sh"
71
72 #if skip-detection
73 (Ensure there was no output)
74 $ cat .foo_commit_out
75 And observe a corrupted repository -- rev 2's linkrev is 1, which should never
76 happen for the changelog (the linkrev should always refer to itself).
77 $ hg debugrevlogindex -c
78 rev linkrev nodeid p1 p2
79 0 0 222799e2f90b 000000000000 000000000000
80 1 1 6f124f6007a0 222799e2f90b 000000000000
81 2 1 ac80e6205bb2 222799e2f90b 000000000000
82 #endif
83
84 #if fail-if-detected
85 $ cat .foo_commit_out
86 transaction abort!
87 rollback completed
88 note: commit message saved in .hg/last-message.txt
89 note: use 'hg commit --logfile .hg/last-message.txt --edit' to reuse it
90 abort: 00changelog.i: file cursor at position 249, expected 121
91 And no corruption in the changelog.
92 $ hg debugrevlogindex -c
93 rev linkrev nodeid p1 p2
94 0 0 222799e2f90b 000000000000 000000000000
95 1 1 6f124f6007a0 222799e2f90b 000000000000
96 And, because of transactions, there's none in the manifestlog either.
97 $ hg debugrevlogindex -m
98 rev linkrev nodeid p1 p2
99 0 0 7b7020262a56 000000000000 000000000000
100 1 1 ad3fe36d86d9 7b7020262a56 000000000000
101 #endif
102
@@ -90,7 +90,7 b' class gitstore(object): # store.basicst'
90 return os.path.join(self.path, b'..', b'.hg', f)
90 return os.path.join(self.path, b'..', b'.hg', f)
91 raise NotImplementedError(b'Need to pick file for %s.' % f)
91 raise NotImplementedError(b'Need to pick file for %s.' % f)
92
92
93 def changelog(self, trypending):
93 def changelog(self, trypending, concurrencychecker):
94 # TODO we don't have a plan for trypending in hg's git support yet
94 # TODO we don't have a plan for trypending in hg's git support yet
95 return gitlog.changelog(self.git, self._db)
95 return gitlog.changelog(self.git, self._db)
96
96
@@ -380,7 +380,7 b' class changelogrevision(object):'
380
380
381
381
382 class changelog(revlog.revlog):
382 class changelog(revlog.revlog):
383 def __init__(self, opener, trypending=False):
383 def __init__(self, opener, trypending=False, concurrencychecker=None):
384 """Load a changelog revlog using an opener.
384 """Load a changelog revlog using an opener.
385
385
386 If ``trypending`` is true, we attempt to load the index from a
386 If ``trypending`` is true, we attempt to load the index from a
@@ -389,6 +389,9 b' class changelog(revlog.revlog):'
389 revision) data for a transaction that hasn't been finalized yet.
389 revision) data for a transaction that hasn't been finalized yet.
390 It exists in a separate file to facilitate readers (such as
390 It exists in a separate file to facilitate readers (such as
391 hooks processes) accessing data before a transaction is finalized.
391 hooks processes) accessing data before a transaction is finalized.
392
393 ``concurrencychecker`` will be passed to the revlog init function, see
394 the documentation there.
392 """
395 """
393 if trypending and opener.exists(b'00changelog.i.a'):
396 if trypending and opener.exists(b'00changelog.i.a'):
394 indexfile = b'00changelog.i.a'
397 indexfile = b'00changelog.i.a'
@@ -404,6 +407,7 b' class changelog(revlog.revlog):'
404 checkambig=True,
407 checkambig=True,
405 mmaplargeindex=True,
408 mmaplargeindex=True,
406 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
409 persistentnodemap=opener.options.get(b'persistent-nodemap', False),
410 concurrencychecker=concurrencychecker,
407 )
411 )
408
412
409 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
413 if self._initempty and (self.version & 0xFFFF == revlog.REVLOGV1):
@@ -580,6 +580,11 b' coreconfigitem('
580 default=0,
580 default=0,
581 )
581 )
582 coreconfigitem(
582 coreconfigitem(
583 b'debug',
584 b'revlog.verifyposition.changelog',
585 default=b'',
586 )
587 coreconfigitem(
583 b'defaults',
588 b'defaults',
584 b'.*',
589 b'.*',
585 default=None,
590 default=None,
@@ -84,7 +84,10 b' from .utils import ('
84 stringutil,
84 stringutil,
85 )
85 )
86
86
87 from .revlogutils import constants as revlogconst
87 from .revlogutils import (
88 concurrency_checker as revlogchecker,
89 constants as revlogconst,
90 )
88
91
89 release = lockmod.release
92 release = lockmod.release
90 urlerr = util.urlerr
93 urlerr = util.urlerr
@@ -1639,7 +1642,10 b' class localrepository(object):'
1639 def changelog(self):
1642 def changelog(self):
1640 # load dirstate before changelog to avoid race see issue6303
1643 # load dirstate before changelog to avoid race see issue6303
1641 self.dirstate.prefetch_parents()
1644 self.dirstate.prefetch_parents()
1642 return self.store.changelog(txnutil.mayhavepending(self.root))
1645 return self.store.changelog(
1646 txnutil.mayhavepending(self.root),
1647 concurrencychecker=revlogchecker.get_checker(self.ui, b'changelog'),
1648 )
1643
1649
1644 @storecache(b'00manifest.i')
1650 @storecache(b'00manifest.i')
1645 def manifestlog(self):
1651 def manifestlog(self):
@@ -421,6 +421,11 b' class revlog(object):'
421
421
422 If `upperboundcomp` is not None, this is the expected maximal gain from
422 If `upperboundcomp` is not None, this is the expected maximal gain from
423 compression for the data content.
423 compression for the data content.
424
425 `concurrencychecker` is an optional function that receives 3 arguments: a
426 file handle, a filename, and an expected position. It should check whether
427 the current position in the file handle is valid, and log/warn/fail (by
428 raising).
424 """
429 """
425
430
426 _flagserrorclass = error.RevlogError
431 _flagserrorclass = error.RevlogError
@@ -435,6 +440,7 b' class revlog(object):'
435 censorable=False,
440 censorable=False,
436 upperboundcomp=None,
441 upperboundcomp=None,
437 persistentnodemap=False,
442 persistentnodemap=False,
443 concurrencychecker=None,
438 ):
444 ):
439 """
445 """
440 create a revlog object
446 create a revlog object
@@ -490,6 +496,8 b' class revlog(object):'
490
496
491 self._loadindex()
497 self._loadindex()
492
498
499 self._concurrencychecker = concurrencychecker
500
493 def _loadindex(self):
501 def _loadindex(self):
494 mmapindexthreshold = None
502 mmapindexthreshold = None
495 opts = self.opener.options
503 opts = self.opener.options
@@ -2284,6 +2292,21 b' class revlog(object):'
2284 curr = len(self)
2292 curr = len(self)
2285 prev = curr - 1
2293 prev = curr - 1
2286 offset = self.end(prev)
2294 offset = self.end(prev)
2295
2296 if self._concurrencychecker:
2297 if self._inline:
2298 # offset is "as if" it were in the .d file, so we need to add on
2299 # the size of the entry metadata.
2300 self._concurrencychecker(
2301 ifh, self.indexfile, offset + curr * self._io.size
2302 )
2303 else:
2304 # Entries in the .i are a consistent size.
2305 self._concurrencychecker(
2306 ifh, self.indexfile, curr * self._io.size
2307 )
2308 self._concurrencychecker(dfh, self.datafile, offset)
2309
2287 p1r, p2r = self.rev(p1), self.rev(p2)
2310 p1r, p2r = self.rev(p1), self.rev(p2)
2288
2311
2289 # full versions are inserted when the needed deltas
2312 # full versions are inserted when the needed deltas
@@ -433,8 +433,12 b' class basicstore(object):'
433 l.sort()
433 l.sort()
434 return l
434 return l
435
435
436 def changelog(self, trypending):
436 def changelog(self, trypending, concurrencychecker=None):
437 return changelog.changelog(self.vfs, trypending=trypending)
437 return changelog.changelog(
438 self.vfs,
439 trypending=trypending,
440 concurrencychecker=concurrencychecker,
441 )
438
442
439 def manifestlog(self, repo, storenarrowmatch):
443 def manifestlog(self, repo, storenarrowmatch):
440 rootstore = manifest.manifestrevlog(self.vfs)
444 rootstore = manifest.manifestrevlog(self.vfs)
General Comments 0
You need to be logged in to leave comments. Login now