##// END OF EJS Templates
verify: keep the revlog open for reading while verifying it...
marmoute -
r51908:812cd3df default
parent child Browse files
Show More
@@ -1,628 +1,629
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import os
9 import os
10
10
11 from .i18n import _
11 from .i18n import _
12 from .node import short
12 from .node import short
13 from .utils import stringutil
13 from .utils import stringutil
14
14
15 from . import (
15 from . import (
16 error,
16 error,
17 pycompat,
17 pycompat,
18 requirements,
18 requirements,
19 revlog,
19 revlog,
20 transaction,
20 transaction,
21 util,
21 util,
22 )
22 )
23
23
24 VERIFY_DEFAULT = 0
24 VERIFY_DEFAULT = 0
25 VERIFY_FULL = 1
25 VERIFY_FULL = 1
26
26
27
27
28 def verify(repo, level=None):
28 def verify(repo, level=None):
29 with repo.lock():
29 with repo.lock():
30 v = verifier(repo, level)
30 v = verifier(repo, level)
31 return v.verify()
31 return v.verify()
32
32
33
33
34 def _normpath(f):
34 def _normpath(f):
35 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # under hg < 2.4, convert didn't sanitize paths properly, so a
36 # converted repo may contain repeated slashes
36 # converted repo may contain repeated slashes
37 while b'//' in f:
37 while b'//' in f:
38 f = f.replace(b'//', b'/')
38 f = f.replace(b'//', b'/')
39 return f
39 return f
40
40
41
41
42 HINT_FNCACHE = _(
42 HINT_FNCACHE = _(
43 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
44 )
44 )
45
45
46 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 WARN_PARENT_DIR_UNKNOWN_REV = _(
47 b"parent-directory manifest refers to unknown revision %s"
47 b"parent-directory manifest refers to unknown revision %s"
48 )
48 )
49
49
50 WARN_UNKNOWN_COPY_SOURCE = _(
50 WARN_UNKNOWN_COPY_SOURCE = _(
51 b"warning: copy source of '%s' not in parents of %s"
51 b"warning: copy source of '%s' not in parents of %s"
52 )
52 )
53
53
54 WARN_NULLID_COPY_SOURCE = _(
54 WARN_NULLID_COPY_SOURCE = _(
55 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
56 )
56 )
57
57
58
58
59 class verifier:
59 class verifier:
60 def __init__(self, repo, level=None):
60 def __init__(self, repo, level=None):
61 self.repo = repo.unfiltered()
61 self.repo = repo.unfiltered()
62 self.ui = repo.ui
62 self.ui = repo.ui
63 self.match = repo.narrowmatch()
63 self.match = repo.narrowmatch()
64 if level is None:
64 if level is None:
65 level = VERIFY_DEFAULT
65 level = VERIFY_DEFAULT
66 self._level = level
66 self._level = level
67 self.badrevs = set()
67 self.badrevs = set()
68 self.errors = 0
68 self.errors = 0
69 self.warnings = 0
69 self.warnings = 0
70 self.havecl = len(repo.changelog) > 0
70 self.havecl = len(repo.changelog) > 0
71 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
72 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
73 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
74 self.refersmf = False
74 self.refersmf = False
75 self.fncachewarned = False
75 self.fncachewarned = False
76 # developer config: verify.skipflags
76 # developer config: verify.skipflags
77 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
78 self.warnorphanstorefiles = True
78 self.warnorphanstorefiles = True
79
79
80 def _warn(self, msg):
80 def _warn(self, msg):
81 """record a "warning" level issue"""
81 """record a "warning" level issue"""
82 self.ui.warn(msg + b"\n")
82 self.ui.warn(msg + b"\n")
83 self.warnings += 1
83 self.warnings += 1
84
84
85 def _err(self, linkrev, msg, filename=None):
85 def _err(self, linkrev, msg, filename=None):
86 """record a "error" level issue"""
86 """record a "error" level issue"""
87 if linkrev is not None:
87 if linkrev is not None:
88 self.badrevs.add(linkrev)
88 self.badrevs.add(linkrev)
89 linkrev = b"%d" % linkrev
89 linkrev = b"%d" % linkrev
90 else:
90 else:
91 linkrev = b'?'
91 linkrev = b'?'
92 msg = b"%s: %s" % (linkrev, msg)
92 msg = b"%s: %s" % (linkrev, msg)
93 if filename:
93 if filename:
94 msg = b"%s@%s" % (filename, msg)
94 msg = b"%s@%s" % (filename, msg)
95 self.ui.warn(b" " + msg + b"\n")
95 self.ui.warn(b" " + msg + b"\n")
96 self.errors += 1
96 self.errors += 1
97
97
98 def _exc(self, linkrev, msg, inst, filename=None):
98 def _exc(self, linkrev, msg, inst, filename=None):
99 """record exception raised during the verify process"""
99 """record exception raised during the verify process"""
100 fmsg = stringutil.forcebytestr(inst)
100 fmsg = stringutil.forcebytestr(inst)
101 if not fmsg:
101 if not fmsg:
102 fmsg = pycompat.byterepr(inst)
102 fmsg = pycompat.byterepr(inst)
103 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
104
104
105 def _checkrevlog(self, obj, name, linkrev):
105 def _checkrevlog(self, obj, name, linkrev):
106 """verify high level property of a revlog
106 """verify high level property of a revlog
107
107
108 - revlog is present,
108 - revlog is present,
109 - revlog is non-empty,
109 - revlog is non-empty,
110 - sizes (index and data) are correct,
110 - sizes (index and data) are correct,
111 - revlog's format version is correct.
111 - revlog's format version is correct.
112 """
112 """
113 if not len(obj) and (self.havecl or self.havemf):
113 if not len(obj) and (self.havecl or self.havemf):
114 self._err(linkrev, _(b"empty or missing %s") % name)
114 self._err(linkrev, _(b"empty or missing %s") % name)
115 return
115 return
116
116
117 d = obj.checksize()
117 d = obj.checksize()
118 if d[0]:
118 if d[0]:
119 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 self._err(None, _(b"data length off by %d bytes") % d[0], name)
120 if d[1]:
120 if d[1]:
121 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
122
122
123 if obj._format_version != revlog.REVLOGV0:
123 if obj._format_version != revlog.REVLOGV0:
124 if not self.revlogv1:
124 if not self.revlogv1:
125 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
126 elif self.revlogv1:
126 elif self.revlogv1:
127 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
128
128
129 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 def _checkentry(self, obj, i, node, seen, linkrevs, f):
130 """verify a single revlog entry
130 """verify a single revlog entry
131
131
132 arguments are:
132 arguments are:
133 - obj: the source revlog
133 - obj: the source revlog
134 - i: the revision number
134 - i: the revision number
135 - node: the revision node id
135 - node: the revision node id
136 - seen: nodes previously seen for this revlog
136 - seen: nodes previously seen for this revlog
137 - linkrevs: [changelog-revisions] introducing "node"
137 - linkrevs: [changelog-revisions] introducing "node"
138 - f: string label ("changelog", "manifest", or filename)
138 - f: string label ("changelog", "manifest", or filename)
139
139
140 Performs the following checks:
140 Performs the following checks:
141 - linkrev points to an existing changelog revision,
141 - linkrev points to an existing changelog revision,
142 - linkrev points to a changelog revision that introduces this revision,
142 - linkrev points to a changelog revision that introduces this revision,
143 - linkrev points to the lowest of these changesets,
143 - linkrev points to the lowest of these changesets,
144 - both parents exist in the revlog,
144 - both parents exist in the revlog,
145 - the revision is not duplicated.
145 - the revision is not duplicated.
146
146
147 Return the linkrev of the revision (or None for changelog's revisions).
147 Return the linkrev of the revision (or None for changelog's revisions).
148 """
148 """
149 lr = obj.linkrev(obj.rev(node))
149 lr = obj.linkrev(obj.rev(node))
150 if lr < 0 or (self.havecl and lr not in linkrevs):
150 if lr < 0 or (self.havecl and lr not in linkrevs):
151 if lr < 0 or lr >= len(self.repo.changelog):
151 if lr < 0 or lr >= len(self.repo.changelog):
152 msg = _(b"rev %d points to nonexistent changeset %d")
152 msg = _(b"rev %d points to nonexistent changeset %d")
153 else:
153 else:
154 msg = _(b"rev %d points to unexpected changeset %d")
154 msg = _(b"rev %d points to unexpected changeset %d")
155 self._err(None, msg % (i, lr), f)
155 self._err(None, msg % (i, lr), f)
156 if linkrevs:
156 if linkrevs:
157 if f and len(linkrevs) > 1:
157 if f and len(linkrevs) > 1:
158 try:
158 try:
159 # attempt to filter down to real linkrevs
159 # attempt to filter down to real linkrevs
160 linkrevs = []
160 linkrevs = []
161 for lr in linkrevs:
161 for lr in linkrevs:
162 if self.lrugetctx(lr)[f].filenode() == node:
162 if self.lrugetctx(lr)[f].filenode() == node:
163 linkrevs.append(lr)
163 linkrevs.append(lr)
164 except Exception:
164 except Exception:
165 pass
165 pass
166 msg = _(b" (expected %s)")
166 msg = _(b" (expected %s)")
167 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
168 self._warn(msg)
168 self._warn(msg)
169 lr = None # can't be trusted
169 lr = None # can't be trusted
170
170
171 try:
171 try:
172 p1, p2 = obj.parents(node)
172 p1, p2 = obj.parents(node)
173 if p1 not in seen and p1 != self.repo.nullid:
173 if p1 not in seen and p1 != self.repo.nullid:
174 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
175 self._err(lr, msg, f)
175 self._err(lr, msg, f)
176 if p2 not in seen and p2 != self.repo.nullid:
176 if p2 not in seen and p2 != self.repo.nullid:
177 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
178 self._err(lr, msg, f)
178 self._err(lr, msg, f)
179 except Exception as inst:
179 except Exception as inst:
180 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
181
181
182 if node in seen:
182 if node in seen:
183 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
184 seen[node] = i
184 seen[node] = i
185 return lr
185 return lr
186
186
187 def verify(self):
187 def verify(self):
188 """verify the content of the Mercurial repository
188 """verify the content of the Mercurial repository
189
189
190 This method run all verifications, displaying issues as they are found.
190 This method run all verifications, displaying issues as they are found.
191
191
192 return 1 if any error have been encountered, 0 otherwise."""
192 return 1 if any error have been encountered, 0 otherwise."""
193 # initial validation and generic report
193 # initial validation and generic report
194 repo = self.repo
194 repo = self.repo
195 ui = repo.ui
195 ui = repo.ui
196 if not repo.url().startswith(b'file:'):
196 if not repo.url().startswith(b'file:'):
197 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197 raise error.Abort(_(b"cannot verify bundle or remote repos"))
198
198
199 if transaction.has_abandoned_transaction(repo):
199 if transaction.has_abandoned_transaction(repo):
200 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
201
201
202 if ui.verbose or not self.revlogv1:
202 if ui.verbose or not self.revlogv1:
203 ui.status(
203 ui.status(
204 _(b"repository uses revlog format %d\n")
204 _(b"repository uses revlog format %d\n")
205 % (self.revlogv1 and 1 or 0)
205 % (self.revlogv1 and 1 or 0)
206 )
206 )
207
207
208 # data verification
208 # data verification
209 mflinkrevs, filelinkrevs = self._verifychangelog()
209 mflinkrevs, filelinkrevs = self._verifychangelog()
210 filenodes = self._verifymanifest(mflinkrevs)
210 filenodes = self._verifymanifest(mflinkrevs)
211 del mflinkrevs
211 del mflinkrevs
212 self._crosscheckfiles(filelinkrevs, filenodes)
212 self._crosscheckfiles(filelinkrevs, filenodes)
213 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
214
214
215 if self.errors:
215 if self.errors:
216 ui.warn(_(b"not checking dirstate because of previous errors\n"))
216 ui.warn(_(b"not checking dirstate because of previous errors\n"))
217 dirstate_errors = 0
217 dirstate_errors = 0
218 else:
218 else:
219 dirstate_errors = self._verify_dirstate()
219 dirstate_errors = self._verify_dirstate()
220
220
221 # final report
221 # final report
222 ui.status(
222 ui.status(
223 _(b"checked %d changesets with %d changes to %d files\n")
223 _(b"checked %d changesets with %d changes to %d files\n")
224 % (len(repo.changelog), filerevisions, totalfiles)
224 % (len(repo.changelog), filerevisions, totalfiles)
225 )
225 )
226 if self.warnings:
226 if self.warnings:
227 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
227 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
228 if self.fncachewarned:
228 if self.fncachewarned:
229 ui.warn(HINT_FNCACHE)
229 ui.warn(HINT_FNCACHE)
230 if self.errors:
230 if self.errors:
231 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
231 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
232 if self.badrevs:
232 if self.badrevs:
233 msg = _(b"(first damaged changeset appears to be %d)\n")
233 msg = _(b"(first damaged changeset appears to be %d)\n")
234 msg %= min(self.badrevs)
234 msg %= min(self.badrevs)
235 ui.warn(msg)
235 ui.warn(msg)
236 if dirstate_errors:
236 if dirstate_errors:
237 ui.warn(
237 ui.warn(
238 _(b"dirstate inconsistent with current parent's manifest\n")
238 _(b"dirstate inconsistent with current parent's manifest\n")
239 )
239 )
240 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
240 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
241 return 1
241 return 1
242 return 0
242 return 0
243
243
244 def _verifychangelog(self):
244 def _verifychangelog(self):
245 """verify the changelog of a repository
245 """verify the changelog of a repository
246
246
247 The following checks are performed:
247 The following checks are performed:
248 - all of `_checkrevlog` checks,
248 - all of `_checkrevlog` checks,
249 - all of `_checkentry` checks (for each revisions),
249 - all of `_checkentry` checks (for each revisions),
250 - each revision can be read.
250 - each revision can be read.
251
251
252 The function returns some of the data observed in the changesets as a
252 The function returns some of the data observed in the changesets as a
253 (mflinkrevs, filelinkrevs) tuples:
253 (mflinkrevs, filelinkrevs) tuples:
254 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
254 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
255 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
255 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
256
256
257 If a matcher was specified, filelinkrevs will only contains matched
257 If a matcher was specified, filelinkrevs will only contains matched
258 files.
258 files.
259 """
259 """
260 ui = self.ui
260 ui = self.ui
261 repo = self.repo
261 repo = self.repo
262 match = self.match
262 match = self.match
263 cl = repo.changelog
263 cl = repo.changelog
264
264
265 ui.status(_(b"checking changesets\n"))
265 ui.status(_(b"checking changesets\n"))
266 mflinkrevs = {}
266 mflinkrevs = {}
267 filelinkrevs = {}
267 filelinkrevs = {}
268 seen = {}
268 seen = {}
269 self._checkrevlog(cl, b"changelog", 0)
269 self._checkrevlog(cl, b"changelog", 0)
270 progress = ui.makeprogress(
270 progress = ui.makeprogress(
271 _(b'checking'), unit=_(b'changesets'), total=len(repo)
271 _(b'checking'), unit=_(b'changesets'), total=len(repo)
272 )
272 )
273 with cl.reading():
273 for i in repo:
274 for i in repo:
274 progress.update(i)
275 progress.update(i)
275 n = cl.node(i)
276 n = cl.node(i)
276 self._checkentry(cl, i, n, seen, [i], b"changelog")
277 self._checkentry(cl, i, n, seen, [i], b"changelog")
277
278
278 try:
279 try:
279 changes = cl.read(n)
280 changes = cl.read(n)
280 if changes[0] != self.repo.nullid:
281 if changes[0] != self.repo.nullid:
281 mflinkrevs.setdefault(changes[0], []).append(i)
282 mflinkrevs.setdefault(changes[0], []).append(i)
282 self.refersmf = True
283 self.refersmf = True
283 for f in changes[3]:
284 for f in changes[3]:
284 if match(f):
285 if match(f):
285 filelinkrevs.setdefault(_normpath(f), []).append(i)
286 filelinkrevs.setdefault(_normpath(f), []).append(i)
286 except Exception as inst:
287 except Exception as inst:
287 self.refersmf = True
288 self.refersmf = True
288 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
289 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
289 progress.complete()
290 progress.complete()
290 return mflinkrevs, filelinkrevs
291 return mflinkrevs, filelinkrevs
291
292
292 def _verifymanifest(
293 def _verifymanifest(
293 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
294 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
294 ):
295 ):
295 """verify the manifestlog content
296 """verify the manifestlog content
296
297
297 Inputs:
298 Inputs:
298 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
299 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
299 - dir: a subdirectory to check (for tree manifest repo)
300 - dir: a subdirectory to check (for tree manifest repo)
300 - storefiles: set of currently "orphan" files.
301 - storefiles: set of currently "orphan" files.
301 - subdirprogress: a progress object
302 - subdirprogress: a progress object
302
303
303 This function checks:
304 This function checks:
304 * all of `_checkrevlog` checks (for all manifest related revlogs)
305 * all of `_checkrevlog` checks (for all manifest related revlogs)
305 * all of `_checkentry` checks (for all manifest related revisions)
306 * all of `_checkentry` checks (for all manifest related revisions)
306 * nodes for subdirectory exists in the sub-directory manifest
307 * nodes for subdirectory exists in the sub-directory manifest
307 * each manifest entries have a file path
308 * each manifest entries have a file path
308 * each manifest node refered in mflinkrevs exist in the manifest log
309 * each manifest node refered in mflinkrevs exist in the manifest log
309
310
310 If tree manifest is in use and a matchers is specified, only the
311 If tree manifest is in use and a matchers is specified, only the
311 sub-directories matching it will be verified.
312 sub-directories matching it will be verified.
312
313
313 return a two level mapping:
314 return a two level mapping:
314 {"path" -> { filenode -> changelog-revision}}
315 {"path" -> { filenode -> changelog-revision}}
315
316
316 This mapping primarily contains entries for every files in the
317 This mapping primarily contains entries for every files in the
317 repository. In addition, when tree-manifest is used, it also contains
318 repository. In addition, when tree-manifest is used, it also contains
318 sub-directory entries.
319 sub-directory entries.
319
320
320 If a matcher is provided, only matching paths will be included.
321 If a matcher is provided, only matching paths will be included.
321 """
322 """
322 repo = self.repo
323 repo = self.repo
323 ui = self.ui
324 ui = self.ui
324 match = self.match
325 match = self.match
325 mfl = self.repo.manifestlog
326 mfl = self.repo.manifestlog
326 mf = mfl.getstorage(dir)
327 mf = mfl.getstorage(dir)
327
328
328 if not dir:
329 if not dir:
329 self.ui.status(_(b"checking manifests\n"))
330 self.ui.status(_(b"checking manifests\n"))
330
331
331 filenodes = {}
332 filenodes = {}
332 subdirnodes = {}
333 subdirnodes = {}
333 seen = {}
334 seen = {}
334 label = b"manifest"
335 label = b"manifest"
335 if dir:
336 if dir:
336 label = dir
337 label = dir
337 revlogfiles = mf.files()
338 revlogfiles = mf.files()
338 storefiles.difference_update(revlogfiles)
339 storefiles.difference_update(revlogfiles)
339 if subdirprogress: # should be true since we're in a subdirectory
340 if subdirprogress: # should be true since we're in a subdirectory
340 subdirprogress.increment()
341 subdirprogress.increment()
341 if self.refersmf:
342 if self.refersmf:
342 # Do not check manifest if there are only changelog entries with
343 # Do not check manifest if there are only changelog entries with
343 # null manifests.
344 # null manifests.
344 self._checkrevlog(mf._revlog, label, 0)
345 self._checkrevlog(mf._revlog, label, 0)
345 progress = ui.makeprogress(
346 progress = ui.makeprogress(
346 _(b'checking'), unit=_(b'manifests'), total=len(mf)
347 _(b'checking'), unit=_(b'manifests'), total=len(mf)
347 )
348 )
348 for i in mf:
349 for i in mf:
349 if not dir:
350 if not dir:
350 progress.update(i)
351 progress.update(i)
351 n = mf.node(i)
352 n = mf.node(i)
352 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
353 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
353 if n in mflinkrevs:
354 if n in mflinkrevs:
354 del mflinkrevs[n]
355 del mflinkrevs[n]
355 elif dir:
356 elif dir:
356 msg = _(b"%s not in parent-directory manifest") % short(n)
357 msg = _(b"%s not in parent-directory manifest") % short(n)
357 self._err(lr, msg, label)
358 self._err(lr, msg, label)
358 else:
359 else:
359 self._err(lr, _(b"%s not in changesets") % short(n), label)
360 self._err(lr, _(b"%s not in changesets") % short(n), label)
360
361
361 try:
362 try:
362 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
363 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
363 for f, fn, fl in mfdelta.iterentries():
364 for f, fn, fl in mfdelta.iterentries():
364 if not f:
365 if not f:
365 self._err(lr, _(b"entry without name in manifest"))
366 self._err(lr, _(b"entry without name in manifest"))
366 elif f == b"/dev/null": # ignore this in very old repos
367 elif f == b"/dev/null": # ignore this in very old repos
367 continue
368 continue
368 fullpath = dir + _normpath(f)
369 fullpath = dir + _normpath(f)
369 if fl == b't':
370 if fl == b't':
370 if not match.visitdir(fullpath):
371 if not match.visitdir(fullpath):
371 continue
372 continue
372 sdn = subdirnodes.setdefault(fullpath + b'/', {})
373 sdn = subdirnodes.setdefault(fullpath + b'/', {})
373 sdn.setdefault(fn, []).append(lr)
374 sdn.setdefault(fn, []).append(lr)
374 else:
375 else:
375 if not match(fullpath):
376 if not match(fullpath):
376 continue
377 continue
377 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
378 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
378 except Exception as inst:
379 except Exception as inst:
379 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
380 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
380 if self._level >= VERIFY_FULL:
381 if self._level >= VERIFY_FULL:
381 try:
382 try:
382 # Various issues can affect manifest. So we read each full
383 # Various issues can affect manifest. So we read each full
383 # text from storage. This triggers the checks from the core
384 # text from storage. This triggers the checks from the core
384 # code (eg: hash verification, filename are ordered, etc.)
385 # code (eg: hash verification, filename are ordered, etc.)
385 mfdelta = mfl.get(dir, n).read()
386 mfdelta = mfl.get(dir, n).read()
386 except Exception as inst:
387 except Exception as inst:
387 msg = _(b"reading full manifest %s") % short(n)
388 msg = _(b"reading full manifest %s") % short(n)
388 self._exc(lr, msg, inst, label)
389 self._exc(lr, msg, inst, label)
389
390
390 if not dir:
391 if not dir:
391 progress.complete()
392 progress.complete()
392
393
393 if self.havemf:
394 if self.havemf:
394 # since we delete entry in `mflinkrevs` during iteration, any
395 # since we delete entry in `mflinkrevs` during iteration, any
395 # remaining entries are "missing". We need to issue errors for them.
396 # remaining entries are "missing". We need to issue errors for them.
396 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
397 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
397 for c, m in sorted(changesetpairs):
398 for c, m in sorted(changesetpairs):
398 if dir:
399 if dir:
399 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
400 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
400 else:
401 else:
401 msg = _(b"changeset refers to unknown revision %s")
402 msg = _(b"changeset refers to unknown revision %s")
402 msg %= short(m)
403 msg %= short(m)
403 self._err(c, msg, label)
404 self._err(c, msg, label)
404
405
405 if not dir and subdirnodes:
406 if not dir and subdirnodes:
406 self.ui.status(_(b"checking directory manifests\n"))
407 self.ui.status(_(b"checking directory manifests\n"))
407 storefiles = set()
408 storefiles = set()
408 subdirs = set()
409 subdirs = set()
409 revlogv1 = self.revlogv1
410 revlogv1 = self.revlogv1
410 undecodable = []
411 undecodable = []
411 for entry in repo.store.data_entries(undecodable=undecodable):
412 for entry in repo.store.data_entries(undecodable=undecodable):
412 for file_ in entry.files():
413 for file_ in entry.files():
413 f = file_.unencoded_path
414 f = file_.unencoded_path
414 size = file_.file_size(repo.store.vfs)
415 size = file_.file_size(repo.store.vfs)
415 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
416 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
416 storefiles.add(_normpath(f))
417 storefiles.add(_normpath(f))
417 subdirs.add(os.path.dirname(f))
418 subdirs.add(os.path.dirname(f))
418 for f in undecodable:
419 for f in undecodable:
419 self._err(None, _(b"cannot decode filename '%s'") % f)
420 self._err(None, _(b"cannot decode filename '%s'") % f)
420 subdirprogress = ui.makeprogress(
421 subdirprogress = ui.makeprogress(
421 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
422 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
422 )
423 )
423
424
424 for subdir, linkrevs in subdirnodes.items():
425 for subdir, linkrevs in subdirnodes.items():
425 subdirfilenodes = self._verifymanifest(
426 subdirfilenodes = self._verifymanifest(
426 linkrevs, subdir, storefiles, subdirprogress
427 linkrevs, subdir, storefiles, subdirprogress
427 )
428 )
428 for f, onefilenodes in subdirfilenodes.items():
429 for f, onefilenodes in subdirfilenodes.items():
429 filenodes.setdefault(f, {}).update(onefilenodes)
430 filenodes.setdefault(f, {}).update(onefilenodes)
430
431
431 if not dir and subdirnodes:
432 if not dir and subdirnodes:
432 assert subdirprogress is not None # help pytype
433 assert subdirprogress is not None # help pytype
433 subdirprogress.complete()
434 subdirprogress.complete()
434 if self.warnorphanstorefiles:
435 if self.warnorphanstorefiles:
435 for f in sorted(storefiles):
436 for f in sorted(storefiles):
436 self._warn(_(b"warning: orphan data file '%s'") % f)
437 self._warn(_(b"warning: orphan data file '%s'") % f)
437
438
438 return filenodes
439 return filenodes
439
440
440 def _crosscheckfiles(self, filelinkrevs, filenodes):
441 def _crosscheckfiles(self, filelinkrevs, filenodes):
441 repo = self.repo
442 repo = self.repo
442 ui = self.ui
443 ui = self.ui
443 ui.status(_(b"crosschecking files in changesets and manifests\n"))
444 ui.status(_(b"crosschecking files in changesets and manifests\n"))
444
445
445 total = len(filelinkrevs) + len(filenodes)
446 total = len(filelinkrevs) + len(filenodes)
446 progress = ui.makeprogress(
447 progress = ui.makeprogress(
447 _(b'crosschecking'), unit=_(b'files'), total=total
448 _(b'crosschecking'), unit=_(b'files'), total=total
448 )
449 )
449 if self.havemf:
450 if self.havemf:
450 for f in sorted(filelinkrevs):
451 for f in sorted(filelinkrevs):
451 progress.increment()
452 progress.increment()
452 if f not in filenodes:
453 if f not in filenodes:
453 lr = filelinkrevs[f][0]
454 lr = filelinkrevs[f][0]
454 self._err(lr, _(b"in changeset but not in manifest"), f)
455 self._err(lr, _(b"in changeset but not in manifest"), f)
455
456
456 if self.havecl:
457 if self.havecl:
457 for f in sorted(filenodes):
458 for f in sorted(filenodes):
458 progress.increment()
459 progress.increment()
459 if f not in filelinkrevs:
460 if f not in filelinkrevs:
460 try:
461 try:
461 fl = repo.file(f)
462 fl = repo.file(f)
462 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
463 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
463 except Exception:
464 except Exception:
464 lr = None
465 lr = None
465 self._err(lr, _(b"in manifest but not in changeset"), f)
466 self._err(lr, _(b"in manifest but not in changeset"), f)
466
467
467 progress.complete()
468 progress.complete()
468
469
469 def _verifyfiles(self, filenodes, filelinkrevs):
470 def _verifyfiles(self, filenodes, filelinkrevs):
470 repo = self.repo
471 repo = self.repo
471 ui = self.ui
472 ui = self.ui
472 lrugetctx = self.lrugetctx
473 lrugetctx = self.lrugetctx
473 revlogv1 = self.revlogv1
474 revlogv1 = self.revlogv1
474 havemf = self.havemf
475 havemf = self.havemf
475 ui.status(_(b"checking files\n"))
476 ui.status(_(b"checking files\n"))
476
477
477 storefiles = set()
478 storefiles = set()
478 undecodable = []
479 undecodable = []
479 for entry in repo.store.data_entries(undecodable=undecodable):
480 for entry in repo.store.data_entries(undecodable=undecodable):
480 for file_ in entry.files():
481 for file_ in entry.files():
481 size = file_.file_size(repo.store.vfs)
482 size = file_.file_size(repo.store.vfs)
482 f = file_.unencoded_path
483 f = file_.unencoded_path
483 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
484 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
484 storefiles.add(_normpath(f))
485 storefiles.add(_normpath(f))
485 for f in undecodable:
486 for f in undecodable:
486 self._err(None, _(b"cannot decode filename '%s'") % f)
487 self._err(None, _(b"cannot decode filename '%s'") % f)
487
488
488 state = {
489 state = {
489 # TODO this assumes revlog storage for changelog.
490 # TODO this assumes revlog storage for changelog.
490 b'expectedversion': self.repo.changelog._format_version,
491 b'expectedversion': self.repo.changelog._format_version,
491 b'skipflags': self.skipflags,
492 b'skipflags': self.skipflags,
492 # experimental config: censor.policy
493 # experimental config: censor.policy
493 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
494 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
494 }
495 }
495
496
496 files = sorted(set(filenodes) | set(filelinkrevs))
497 files = sorted(set(filenodes) | set(filelinkrevs))
497 revisions = 0
498 revisions = 0
498 progress = ui.makeprogress(
499 progress = ui.makeprogress(
499 _(b'checking'), unit=_(b'files'), total=len(files)
500 _(b'checking'), unit=_(b'files'), total=len(files)
500 )
501 )
501 for i, f in enumerate(files):
502 for i, f in enumerate(files):
502 progress.update(i, item=f)
503 progress.update(i, item=f)
503 try:
504 try:
504 linkrevs = filelinkrevs[f]
505 linkrevs = filelinkrevs[f]
505 except KeyError:
506 except KeyError:
506 # in manifest but not in changelog
507 # in manifest but not in changelog
507 linkrevs = []
508 linkrevs = []
508
509
509 if linkrevs:
510 if linkrevs:
510 lr = linkrevs[0]
511 lr = linkrevs[0]
511 else:
512 else:
512 lr = None
513 lr = None
513
514
514 try:
515 try:
515 fl = repo.file(f)
516 fl = repo.file(f)
516 except error.StorageError as e:
517 except error.StorageError as e:
517 self._err(lr, _(b"broken revlog! (%s)") % e, f)
518 self._err(lr, _(b"broken revlog! (%s)") % e, f)
518 continue
519 continue
519
520
520 for ff in fl.files():
521 for ff in fl.files():
521 try:
522 try:
522 storefiles.remove(ff)
523 storefiles.remove(ff)
523 except KeyError:
524 except KeyError:
524 if self.warnorphanstorefiles:
525 if self.warnorphanstorefiles:
525 msg = _(b" warning: revlog '%s' not in fncache!")
526 msg = _(b" warning: revlog '%s' not in fncache!")
526 self._warn(msg % ff)
527 self._warn(msg % ff)
527 self.fncachewarned = True
528 self.fncachewarned = True
528
529
529 if not len(fl) and (self.havecl or self.havemf):
530 if not len(fl) and (self.havecl or self.havemf):
530 self._err(lr, _(b"empty or missing %s") % f)
531 self._err(lr, _(b"empty or missing %s") % f)
531 else:
532 else:
532 # Guard against implementations not setting this.
533 # Guard against implementations not setting this.
533 state[b'skipread'] = set()
534 state[b'skipread'] = set()
534 state[b'safe_renamed'] = set()
535 state[b'safe_renamed'] = set()
535
536
536 for problem in fl.verifyintegrity(state):
537 for problem in fl.verifyintegrity(state):
537 if problem.node is not None:
538 if problem.node is not None:
538 linkrev = fl.linkrev(fl.rev(problem.node))
539 linkrev = fl.linkrev(fl.rev(problem.node))
539 else:
540 else:
540 linkrev = None
541 linkrev = None
541
542
542 if problem.warning:
543 if problem.warning:
543 self._warn(problem.warning)
544 self._warn(problem.warning)
544 elif problem.error:
545 elif problem.error:
545 linkrev_msg = linkrev if linkrev is not None else lr
546 linkrev_msg = linkrev if linkrev is not None else lr
546 self._err(linkrev_msg, problem.error, f)
547 self._err(linkrev_msg, problem.error, f)
547 else:
548 else:
548 raise error.ProgrammingError(
549 raise error.ProgrammingError(
549 b'problem instance does not set warning or error '
550 b'problem instance does not set warning or error '
550 b'attribute: %s' % problem.msg
551 b'attribute: %s' % problem.msg
551 )
552 )
552
553
553 seen = {}
554 seen = {}
554 for i in fl:
555 for i in fl:
555 revisions += 1
556 revisions += 1
556 n = fl.node(i)
557 n = fl.node(i)
557 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
558 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
558 if f in filenodes:
559 if f in filenodes:
559 if havemf and n not in filenodes[f]:
560 if havemf and n not in filenodes[f]:
560 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
561 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
561 else:
562 else:
562 del filenodes[f][n]
563 del filenodes[f][n]
563
564
564 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
565 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
565 continue
566 continue
566
567
567 # check renames
568 # check renames
568 try:
569 try:
569 # This requires resolving fulltext (at least on revlogs,
570 # This requires resolving fulltext (at least on revlogs,
570 # though not with LFS revisions). We may want
571 # though not with LFS revisions). We may want
571 # ``verifyintegrity()`` to pass a set of nodes with
572 # ``verifyintegrity()`` to pass a set of nodes with
572 # rename metadata as an optimization.
573 # rename metadata as an optimization.
573 rp = fl.renamed(n)
574 rp = fl.renamed(n)
574 if rp:
575 if rp:
575 if lr is not None and ui.verbose:
576 if lr is not None and ui.verbose:
576 ctx = lrugetctx(lr)
577 ctx = lrugetctx(lr)
577 if not any(rp[0] in pctx for pctx in ctx.parents()):
578 if not any(rp[0] in pctx for pctx in ctx.parents()):
578 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
579 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
579 fl2 = repo.file(rp[0])
580 fl2 = repo.file(rp[0])
580 if not len(fl2):
581 if not len(fl2):
581 m = _(b"empty or missing copy source revlog %s:%s")
582 m = _(b"empty or missing copy source revlog %s:%s")
582 self._err(lr, m % (rp[0], short(rp[1])), f)
583 self._err(lr, m % (rp[0], short(rp[1])), f)
583 elif rp[1] == self.repo.nullid:
584 elif rp[1] == self.repo.nullid:
584 msg = WARN_NULLID_COPY_SOURCE
585 msg = WARN_NULLID_COPY_SOURCE
585 msg %= (f, lr, rp[0], short(rp[1]))
586 msg %= (f, lr, rp[0], short(rp[1]))
586 ui.note(msg)
587 ui.note(msg)
587 else:
588 else:
588 fl2.rev(rp[1])
589 fl2.rev(rp[1])
589 except Exception as inst:
590 except Exception as inst:
590 self._exc(
591 self._exc(
591 lr, _(b"checking rename of %s") % short(n), inst, f
592 lr, _(b"checking rename of %s") % short(n), inst, f
592 )
593 )
593
594
594 # cross-check
595 # cross-check
595 if f in filenodes:
596 if f in filenodes:
596 fns = [(v, k) for k, v in filenodes[f].items()]
597 fns = [(v, k) for k, v in filenodes[f].items()]
597 for lr, node in sorted(fns):
598 for lr, node in sorted(fns):
598 msg = _(b"manifest refers to unknown revision %s")
599 msg = _(b"manifest refers to unknown revision %s")
599 self._err(lr, msg % short(node), f)
600 self._err(lr, msg % short(node), f)
600 progress.complete()
601 progress.complete()
601
602
602 if self.warnorphanstorefiles:
603 if self.warnorphanstorefiles:
603 for f in sorted(storefiles):
604 for f in sorted(storefiles):
604 self._warn(_(b"warning: orphan data file '%s'") % f)
605 self._warn(_(b"warning: orphan data file '%s'") % f)
605
606
606 return len(files), revisions
607 return len(files), revisions
607
608
608 def _verify_dirstate(self):
609 def _verify_dirstate(self):
609 """Check that the dirstate is consistent with the parent's manifest"""
610 """Check that the dirstate is consistent with the parent's manifest"""
610 repo = self.repo
611 repo = self.repo
611 ui = self.ui
612 ui = self.ui
612 ui.status(_(b"checking dirstate\n"))
613 ui.status(_(b"checking dirstate\n"))
613
614
614 parent1, parent2 = repo.dirstate.parents()
615 parent1, parent2 = repo.dirstate.parents()
615 m1 = repo[parent1].manifest()
616 m1 = repo[parent1].manifest()
616 m2 = repo[parent2].manifest()
617 m2 = repo[parent2].manifest()
617 dirstate_errors = 0
618 dirstate_errors = 0
618
619
619 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
620 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
620 narrow_matcher = repo.narrowmatch() if is_narrow else None
621 narrow_matcher = repo.narrowmatch() if is_narrow else None
621
622
622 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
623 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
623 ui.error(err)
624 ui.error(err)
624 dirstate_errors += 1
625 dirstate_errors += 1
625
626
626 if dirstate_errors:
627 if dirstate_errors:
627 self.errors += dirstate_errors
628 self.errors += dirstate_errors
628 return dirstate_errors
629 return dirstate_errors
General Comments 0
You need to be logged in to leave comments. Login now