##// END OF EJS Templates
typing: add type hints to `mercurial.verify._normpath()`...
Matt Harbison -
r52611:45828bc3 default
parent child Browse files
Show More
@@ -1,629 +1,629 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import os
9 import os
10
10
11 from .i18n import _
11 from .i18n import _
12 from .node import short
12 from .node import short
13 from .utils import stringutil
13 from .utils import stringutil
14
14
15 from . import (
15 from . import (
16 error,
16 error,
17 pycompat,
17 pycompat,
18 requirements,
18 requirements,
19 revlog,
19 revlog,
20 transaction,
20 transaction,
21 util,
21 util,
22 )
22 )
23
23
24 VERIFY_DEFAULT = 0
24 VERIFY_DEFAULT = 0
25 VERIFY_FULL = 1
25 VERIFY_FULL = 1
26
26
27
27
28 def verify(repo, level=None):
28 def verify(repo, level=None):
29 with repo.lock():
29 with repo.lock():
30 v = verifier(repo, level)
30 v = verifier(repo, level)
31 return v.verify()
31 return v.verify()
32
32
33
33
34 def _normpath(f):
34 def _normpath(f: bytes) -> bytes:
35 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # under hg < 2.4, convert didn't sanitize paths properly, so a
36 # converted repo may contain repeated slashes
36 # converted repo may contain repeated slashes
37 while b'//' in f:
37 while b'//' in f:
38 f = f.replace(b'//', b'/')
38 f = f.replace(b'//', b'/')
39 return f
39 return f
40
40
41
41
42 HINT_FNCACHE = _(
42 HINT_FNCACHE = _(
43 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
44 )
44 )
45
45
46 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 WARN_PARENT_DIR_UNKNOWN_REV = _(
47 b"parent-directory manifest refers to unknown revision %s"
47 b"parent-directory manifest refers to unknown revision %s"
48 )
48 )
49
49
50 WARN_UNKNOWN_COPY_SOURCE = _(
50 WARN_UNKNOWN_COPY_SOURCE = _(
51 b"warning: copy source of '%s' not in parents of %s"
51 b"warning: copy source of '%s' not in parents of %s"
52 )
52 )
53
53
54 WARN_NULLID_COPY_SOURCE = _(
54 WARN_NULLID_COPY_SOURCE = _(
55 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
56 )
56 )
57
57
58
58
59 class verifier:
59 class verifier:
60 def __init__(self, repo, level=None):
60 def __init__(self, repo, level=None):
61 self.repo = repo.unfiltered()
61 self.repo = repo.unfiltered()
62 self.ui = repo.ui
62 self.ui = repo.ui
63 self.match = repo.narrowmatch()
63 self.match = repo.narrowmatch()
64 if level is None:
64 if level is None:
65 level = VERIFY_DEFAULT
65 level = VERIFY_DEFAULT
66 self._level = level
66 self._level = level
67 self.badrevs = set()
67 self.badrevs = set()
68 self.errors = 0
68 self.errors = 0
69 self.warnings = 0
69 self.warnings = 0
70 self.havecl = len(repo.changelog) > 0
70 self.havecl = len(repo.changelog) > 0
71 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
72 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
73 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
74 self.refersmf = False
74 self.refersmf = False
75 self.fncachewarned = False
75 self.fncachewarned = False
76 # developer config: verify.skipflags
76 # developer config: verify.skipflags
77 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
78 self.warnorphanstorefiles = True
78 self.warnorphanstorefiles = True
79
79
80 def _warn(self, msg):
80 def _warn(self, msg):
81 """record a "warning" level issue"""
81 """record a "warning" level issue"""
82 self.ui.warn(msg + b"\n")
82 self.ui.warn(msg + b"\n")
83 self.warnings += 1
83 self.warnings += 1
84
84
85 def _err(self, linkrev, msg, filename=None):
85 def _err(self, linkrev, msg, filename=None):
86 """record a "error" level issue"""
86 """record a "error" level issue"""
87 if linkrev is not None:
87 if linkrev is not None:
88 self.badrevs.add(linkrev)
88 self.badrevs.add(linkrev)
89 linkrev = b"%d" % linkrev
89 linkrev = b"%d" % linkrev
90 else:
90 else:
91 linkrev = b'?'
91 linkrev = b'?'
92 msg = b"%s: %s" % (linkrev, msg)
92 msg = b"%s: %s" % (linkrev, msg)
93 if filename:
93 if filename:
94 msg = b"%s@%s" % (filename, msg)
94 msg = b"%s@%s" % (filename, msg)
95 self.ui.warn(b" " + msg + b"\n")
95 self.ui.warn(b" " + msg + b"\n")
96 self.errors += 1
96 self.errors += 1
97
97
98 def _exc(self, linkrev, msg, inst, filename=None):
98 def _exc(self, linkrev, msg, inst, filename=None):
99 """record exception raised during the verify process"""
99 """record exception raised during the verify process"""
100 fmsg = stringutil.forcebytestr(inst)
100 fmsg = stringutil.forcebytestr(inst)
101 if not fmsg:
101 if not fmsg:
102 fmsg = pycompat.byterepr(inst)
102 fmsg = pycompat.byterepr(inst)
103 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
104
104
105 def _checkrevlog(self, obj, name, linkrev):
105 def _checkrevlog(self, obj, name, linkrev):
106 """verify high level property of a revlog
106 """verify high level property of a revlog
107
107
108 - revlog is present,
108 - revlog is present,
109 - revlog is non-empty,
109 - revlog is non-empty,
110 - sizes (index and data) are correct,
110 - sizes (index and data) are correct,
111 - revlog's format version is correct.
111 - revlog's format version is correct.
112 """
112 """
113 if not len(obj) and (self.havecl or self.havemf):
113 if not len(obj) and (self.havecl or self.havemf):
114 self._err(linkrev, _(b"empty or missing %s") % name)
114 self._err(linkrev, _(b"empty or missing %s") % name)
115 return
115 return
116
116
117 d = obj.checksize()
117 d = obj.checksize()
118 if d[0]:
118 if d[0]:
119 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 self._err(None, _(b"data length off by %d bytes") % d[0], name)
120 if d[1]:
120 if d[1]:
121 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
122
122
123 if obj._format_version != revlog.REVLOGV0:
123 if obj._format_version != revlog.REVLOGV0:
124 if not self.revlogv1:
124 if not self.revlogv1:
125 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
126 elif self.revlogv1:
126 elif self.revlogv1:
127 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
128
128
129 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 def _checkentry(self, obj, i, node, seen, linkrevs, f):
130 """verify a single revlog entry
130 """verify a single revlog entry
131
131
132 arguments are:
132 arguments are:
133 - obj: the source revlog
133 - obj: the source revlog
134 - i: the revision number
134 - i: the revision number
135 - node: the revision node id
135 - node: the revision node id
136 - seen: nodes previously seen for this revlog
136 - seen: nodes previously seen for this revlog
137 - linkrevs: [changelog-revisions] introducing "node"
137 - linkrevs: [changelog-revisions] introducing "node"
138 - f: string label ("changelog", "manifest", or filename)
138 - f: string label ("changelog", "manifest", or filename)
139
139
140 Performs the following checks:
140 Performs the following checks:
141 - linkrev points to an existing changelog revision,
141 - linkrev points to an existing changelog revision,
142 - linkrev points to a changelog revision that introduces this revision,
142 - linkrev points to a changelog revision that introduces this revision,
143 - linkrev points to the lowest of these changesets,
143 - linkrev points to the lowest of these changesets,
144 - both parents exist in the revlog,
144 - both parents exist in the revlog,
145 - the revision is not duplicated.
145 - the revision is not duplicated.
146
146
147 Return the linkrev of the revision (or None for changelog's revisions).
147 Return the linkrev of the revision (or None for changelog's revisions).
148 """
148 """
149 lr = obj.linkrev(obj.rev(node))
149 lr = obj.linkrev(obj.rev(node))
150 if lr < 0 or (self.havecl and lr not in linkrevs):
150 if lr < 0 or (self.havecl and lr not in linkrevs):
151 if lr < 0 or lr >= len(self.repo.changelog):
151 if lr < 0 or lr >= len(self.repo.changelog):
152 msg = _(b"rev %d points to nonexistent changeset %d")
152 msg = _(b"rev %d points to nonexistent changeset %d")
153 else:
153 else:
154 msg = _(b"rev %d points to unexpected changeset %d")
154 msg = _(b"rev %d points to unexpected changeset %d")
155 self._err(None, msg % (i, lr), f)
155 self._err(None, msg % (i, lr), f)
156 if linkrevs:
156 if linkrevs:
157 if f and len(linkrevs) > 1:
157 if f and len(linkrevs) > 1:
158 try:
158 try:
159 # attempt to filter down to real linkrevs
159 # attempt to filter down to real linkrevs
160 linkrevs = []
160 linkrevs = []
161 for lr in linkrevs:
161 for lr in linkrevs:
162 if self.lrugetctx(lr)[f].filenode() == node:
162 if self.lrugetctx(lr)[f].filenode() == node:
163 linkrevs.append(lr)
163 linkrevs.append(lr)
164 except Exception:
164 except Exception:
165 pass
165 pass
166 msg = _(b" (expected %s)")
166 msg = _(b" (expected %s)")
167 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
168 self._warn(msg)
168 self._warn(msg)
169 lr = None # can't be trusted
169 lr = None # can't be trusted
170
170
171 try:
171 try:
172 p1, p2 = obj.parents(node)
172 p1, p2 = obj.parents(node)
173 if p1 not in seen and p1 != self.repo.nullid:
173 if p1 not in seen and p1 != self.repo.nullid:
174 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
175 self._err(lr, msg, f)
175 self._err(lr, msg, f)
176 if p2 not in seen and p2 != self.repo.nullid:
176 if p2 not in seen and p2 != self.repo.nullid:
177 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
178 self._err(lr, msg, f)
178 self._err(lr, msg, f)
179 except Exception as inst:
179 except Exception as inst:
180 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
181
181
182 if node in seen:
182 if node in seen:
183 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
184 seen[node] = i
184 seen[node] = i
185 return lr
185 return lr
186
186
187 def verify(self):
187 def verify(self):
188 """verify the content of the Mercurial repository
188 """verify the content of the Mercurial repository
189
189
190 This method run all verifications, displaying issues as they are found.
190 This method run all verifications, displaying issues as they are found.
191
191
192 return 1 if any error have been encountered, 0 otherwise."""
192 return 1 if any error have been encountered, 0 otherwise."""
193 # initial validation and generic report
193 # initial validation and generic report
194 repo = self.repo
194 repo = self.repo
195 ui = repo.ui
195 ui = repo.ui
196 if not repo.url().startswith(b'file:'):
196 if not repo.url().startswith(b'file:'):
197 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197 raise error.Abort(_(b"cannot verify bundle or remote repos"))
198
198
199 if transaction.has_abandoned_transaction(repo):
199 if transaction.has_abandoned_transaction(repo):
200 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
201
201
202 if ui.verbose or not self.revlogv1:
202 if ui.verbose or not self.revlogv1:
203 ui.status(
203 ui.status(
204 _(b"repository uses revlog format %d\n")
204 _(b"repository uses revlog format %d\n")
205 % (self.revlogv1 and 1 or 0)
205 % (self.revlogv1 and 1 or 0)
206 )
206 )
207
207
208 # data verification
208 # data verification
209 mflinkrevs, filelinkrevs = self._verifychangelog()
209 mflinkrevs, filelinkrevs = self._verifychangelog()
210 filenodes = self._verifymanifest(mflinkrevs)
210 filenodes = self._verifymanifest(mflinkrevs)
211 del mflinkrevs
211 del mflinkrevs
212 self._crosscheckfiles(filelinkrevs, filenodes)
212 self._crosscheckfiles(filelinkrevs, filenodes)
213 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
214
214
215 if self.errors:
215 if self.errors:
216 ui.warn(_(b"not checking dirstate because of previous errors\n"))
216 ui.warn(_(b"not checking dirstate because of previous errors\n"))
217 dirstate_errors = 0
217 dirstate_errors = 0
218 else:
218 else:
219 dirstate_errors = self._verify_dirstate()
219 dirstate_errors = self._verify_dirstate()
220
220
221 # final report
221 # final report
222 ui.status(
222 ui.status(
223 _(b"checked %d changesets with %d changes to %d files\n")
223 _(b"checked %d changesets with %d changes to %d files\n")
224 % (len(repo.changelog), filerevisions, totalfiles)
224 % (len(repo.changelog), filerevisions, totalfiles)
225 )
225 )
226 if self.warnings:
226 if self.warnings:
227 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
227 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
228 if self.fncachewarned:
228 if self.fncachewarned:
229 ui.warn(HINT_FNCACHE)
229 ui.warn(HINT_FNCACHE)
230 if self.errors:
230 if self.errors:
231 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
231 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
232 if self.badrevs:
232 if self.badrevs:
233 msg = _(b"(first damaged changeset appears to be %d)\n")
233 msg = _(b"(first damaged changeset appears to be %d)\n")
234 msg %= min(self.badrevs)
234 msg %= min(self.badrevs)
235 ui.warn(msg)
235 ui.warn(msg)
236 if dirstate_errors:
236 if dirstate_errors:
237 ui.warn(
237 ui.warn(
238 _(b"dirstate inconsistent with current parent's manifest\n")
238 _(b"dirstate inconsistent with current parent's manifest\n")
239 )
239 )
240 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
240 ui.warn(_(b"%d dirstate errors\n") % dirstate_errors)
241 return 1
241 return 1
242 return 0
242 return 0
243
243
244 def _verifychangelog(self):
244 def _verifychangelog(self):
245 """verify the changelog of a repository
245 """verify the changelog of a repository
246
246
247 The following checks are performed:
247 The following checks are performed:
248 - all of `_checkrevlog` checks,
248 - all of `_checkrevlog` checks,
249 - all of `_checkentry` checks (for each revisions),
249 - all of `_checkentry` checks (for each revisions),
250 - each revision can be read.
250 - each revision can be read.
251
251
252 The function returns some of the data observed in the changesets as a
252 The function returns some of the data observed in the changesets as a
253 (mflinkrevs, filelinkrevs) tuples:
253 (mflinkrevs, filelinkrevs) tuples:
254 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
254 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
255 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
255 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
256
256
257 If a matcher was specified, filelinkrevs will only contains matched
257 If a matcher was specified, filelinkrevs will only contains matched
258 files.
258 files.
259 """
259 """
260 ui = self.ui
260 ui = self.ui
261 repo = self.repo
261 repo = self.repo
262 match = self.match
262 match = self.match
263 cl = repo.changelog
263 cl = repo.changelog
264
264
265 ui.status(_(b"checking changesets\n"))
265 ui.status(_(b"checking changesets\n"))
266 mflinkrevs = {}
266 mflinkrevs = {}
267 filelinkrevs = {}
267 filelinkrevs = {}
268 seen = {}
268 seen = {}
269 self._checkrevlog(cl, b"changelog", 0)
269 self._checkrevlog(cl, b"changelog", 0)
270 progress = ui.makeprogress(
270 progress = ui.makeprogress(
271 _(b'checking'), unit=_(b'changesets'), total=len(repo)
271 _(b'checking'), unit=_(b'changesets'), total=len(repo)
272 )
272 )
273 with cl.reading():
273 with cl.reading():
274 for i in repo:
274 for i in repo:
275 progress.update(i)
275 progress.update(i)
276 n = cl.node(i)
276 n = cl.node(i)
277 self._checkentry(cl, i, n, seen, [i], b"changelog")
277 self._checkentry(cl, i, n, seen, [i], b"changelog")
278
278
279 try:
279 try:
280 changes = cl.read(n)
280 changes = cl.read(n)
281 if changes[0] != self.repo.nullid:
281 if changes[0] != self.repo.nullid:
282 mflinkrevs.setdefault(changes[0], []).append(i)
282 mflinkrevs.setdefault(changes[0], []).append(i)
283 self.refersmf = True
283 self.refersmf = True
284 for f in changes[3]:
284 for f in changes[3]:
285 if match(f):
285 if match(f):
286 filelinkrevs.setdefault(_normpath(f), []).append(i)
286 filelinkrevs.setdefault(_normpath(f), []).append(i)
287 except Exception as inst:
287 except Exception as inst:
288 self.refersmf = True
288 self.refersmf = True
289 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
289 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
290 progress.complete()
290 progress.complete()
291 return mflinkrevs, filelinkrevs
291 return mflinkrevs, filelinkrevs
292
292
293 def _verifymanifest(
293 def _verifymanifest(
294 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
294 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
295 ):
295 ):
296 """verify the manifestlog content
296 """verify the manifestlog content
297
297
298 Inputs:
298 Inputs:
299 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
299 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
300 - dir: a subdirectory to check (for tree manifest repo)
300 - dir: a subdirectory to check (for tree manifest repo)
301 - storefiles: set of currently "orphan" files.
301 - storefiles: set of currently "orphan" files.
302 - subdirprogress: a progress object
302 - subdirprogress: a progress object
303
303
304 This function checks:
304 This function checks:
305 * all of `_checkrevlog` checks (for all manifest related revlogs)
305 * all of `_checkrevlog` checks (for all manifest related revlogs)
306 * all of `_checkentry` checks (for all manifest related revisions)
306 * all of `_checkentry` checks (for all manifest related revisions)
307 * nodes for subdirectory exists in the sub-directory manifest
307 * nodes for subdirectory exists in the sub-directory manifest
308 * each manifest entries have a file path
308 * each manifest entries have a file path
309 * each manifest node refered in mflinkrevs exist in the manifest log
309 * each manifest node refered in mflinkrevs exist in the manifest log
310
310
311 If tree manifest is in use and a matchers is specified, only the
311 If tree manifest is in use and a matchers is specified, only the
312 sub-directories matching it will be verified.
312 sub-directories matching it will be verified.
313
313
314 return a two level mapping:
314 return a two level mapping:
315 {"path" -> { filenode -> changelog-revision}}
315 {"path" -> { filenode -> changelog-revision}}
316
316
317 This mapping primarily contains entries for every files in the
317 This mapping primarily contains entries for every files in the
318 repository. In addition, when tree-manifest is used, it also contains
318 repository. In addition, when tree-manifest is used, it also contains
319 sub-directory entries.
319 sub-directory entries.
320
320
321 If a matcher is provided, only matching paths will be included.
321 If a matcher is provided, only matching paths will be included.
322 """
322 """
323 repo = self.repo
323 repo = self.repo
324 ui = self.ui
324 ui = self.ui
325 match = self.match
325 match = self.match
326 mfl = self.repo.manifestlog
326 mfl = self.repo.manifestlog
327 mf = mfl.getstorage(dir)
327 mf = mfl.getstorage(dir)
328
328
329 if not dir:
329 if not dir:
330 self.ui.status(_(b"checking manifests\n"))
330 self.ui.status(_(b"checking manifests\n"))
331
331
332 filenodes = {}
332 filenodes = {}
333 subdirnodes = {}
333 subdirnodes = {}
334 seen = {}
334 seen = {}
335 label = b"manifest"
335 label = b"manifest"
336 if dir:
336 if dir:
337 label = dir
337 label = dir
338 revlogfiles = mf.files()
338 revlogfiles = mf.files()
339 storefiles.difference_update(revlogfiles)
339 storefiles.difference_update(revlogfiles)
340 if subdirprogress: # should be true since we're in a subdirectory
340 if subdirprogress: # should be true since we're in a subdirectory
341 subdirprogress.increment()
341 subdirprogress.increment()
342 if self.refersmf:
342 if self.refersmf:
343 # Do not check manifest if there are only changelog entries with
343 # Do not check manifest if there are only changelog entries with
344 # null manifests.
344 # null manifests.
345 self._checkrevlog(mf._revlog, label, 0)
345 self._checkrevlog(mf._revlog, label, 0)
346 progress = ui.makeprogress(
346 progress = ui.makeprogress(
347 _(b'checking'), unit=_(b'manifests'), total=len(mf)
347 _(b'checking'), unit=_(b'manifests'), total=len(mf)
348 )
348 )
349 for i in mf:
349 for i in mf:
350 if not dir:
350 if not dir:
351 progress.update(i)
351 progress.update(i)
352 n = mf.node(i)
352 n = mf.node(i)
353 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
353 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
354 if n in mflinkrevs:
354 if n in mflinkrevs:
355 del mflinkrevs[n]
355 del mflinkrevs[n]
356 elif dir:
356 elif dir:
357 msg = _(b"%s not in parent-directory manifest") % short(n)
357 msg = _(b"%s not in parent-directory manifest") % short(n)
358 self._err(lr, msg, label)
358 self._err(lr, msg, label)
359 else:
359 else:
360 self._err(lr, _(b"%s not in changesets") % short(n), label)
360 self._err(lr, _(b"%s not in changesets") % short(n), label)
361
361
362 try:
362 try:
363 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
363 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
364 for f, fn, fl in mfdelta.iterentries():
364 for f, fn, fl in mfdelta.iterentries():
365 if not f:
365 if not f:
366 self._err(lr, _(b"entry without name in manifest"))
366 self._err(lr, _(b"entry without name in manifest"))
367 elif f == b"/dev/null": # ignore this in very old repos
367 elif f == b"/dev/null": # ignore this in very old repos
368 continue
368 continue
369 fullpath = dir + _normpath(f)
369 fullpath = dir + _normpath(f)
370 if fl == b't':
370 if fl == b't':
371 if not match.visitdir(fullpath):
371 if not match.visitdir(fullpath):
372 continue
372 continue
373 sdn = subdirnodes.setdefault(fullpath + b'/', {})
373 sdn = subdirnodes.setdefault(fullpath + b'/', {})
374 sdn.setdefault(fn, []).append(lr)
374 sdn.setdefault(fn, []).append(lr)
375 else:
375 else:
376 if not match(fullpath):
376 if not match(fullpath):
377 continue
377 continue
378 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
378 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
379 except Exception as inst:
379 except Exception as inst:
380 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
380 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
381 if self._level >= VERIFY_FULL:
381 if self._level >= VERIFY_FULL:
382 try:
382 try:
383 # Various issues can affect manifest. So we read each full
383 # Various issues can affect manifest. So we read each full
384 # text from storage. This triggers the checks from the core
384 # text from storage. This triggers the checks from the core
385 # code (eg: hash verification, filename are ordered, etc.)
385 # code (eg: hash verification, filename are ordered, etc.)
386 mfdelta = mfl.get(dir, n).read()
386 mfdelta = mfl.get(dir, n).read()
387 except Exception as inst:
387 except Exception as inst:
388 msg = _(b"reading full manifest %s") % short(n)
388 msg = _(b"reading full manifest %s") % short(n)
389 self._exc(lr, msg, inst, label)
389 self._exc(lr, msg, inst, label)
390
390
391 if not dir:
391 if not dir:
392 progress.complete()
392 progress.complete()
393
393
394 if self.havemf:
394 if self.havemf:
395 # since we delete entry in `mflinkrevs` during iteration, any
395 # since we delete entry in `mflinkrevs` during iteration, any
396 # remaining entries are "missing". We need to issue errors for them.
396 # remaining entries are "missing". We need to issue errors for them.
397 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
397 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
398 for c, m in sorted(changesetpairs):
398 for c, m in sorted(changesetpairs):
399 if dir:
399 if dir:
400 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
400 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
401 else:
401 else:
402 msg = _(b"changeset refers to unknown revision %s")
402 msg = _(b"changeset refers to unknown revision %s")
403 msg %= short(m)
403 msg %= short(m)
404 self._err(c, msg, label)
404 self._err(c, msg, label)
405
405
406 if not dir and subdirnodes:
406 if not dir and subdirnodes:
407 self.ui.status(_(b"checking directory manifests\n"))
407 self.ui.status(_(b"checking directory manifests\n"))
408 storefiles = set()
408 storefiles = set()
409 subdirs = set()
409 subdirs = set()
410 revlogv1 = self.revlogv1
410 revlogv1 = self.revlogv1
411 undecodable = []
411 undecodable = []
412 for entry in repo.store.data_entries(undecodable=undecodable):
412 for entry in repo.store.data_entries(undecodable=undecodable):
413 for file_ in entry.files():
413 for file_ in entry.files():
414 f = file_.unencoded_path
414 f = file_.unencoded_path
415 size = file_.file_size(repo.store.vfs)
415 size = file_.file_size(repo.store.vfs)
416 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
416 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
417 storefiles.add(_normpath(f))
417 storefiles.add(_normpath(f))
418 subdirs.add(os.path.dirname(f))
418 subdirs.add(os.path.dirname(f))
419 for f in undecodable:
419 for f in undecodable:
420 self._err(None, _(b"cannot decode filename '%s'") % f)
420 self._err(None, _(b"cannot decode filename '%s'") % f)
421 subdirprogress = ui.makeprogress(
421 subdirprogress = ui.makeprogress(
422 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
422 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
423 )
423 )
424
424
425 for subdir, linkrevs in subdirnodes.items():
425 for subdir, linkrevs in subdirnodes.items():
426 subdirfilenodes = self._verifymanifest(
426 subdirfilenodes = self._verifymanifest(
427 linkrevs, subdir, storefiles, subdirprogress
427 linkrevs, subdir, storefiles, subdirprogress
428 )
428 )
429 for f, onefilenodes in subdirfilenodes.items():
429 for f, onefilenodes in subdirfilenodes.items():
430 filenodes.setdefault(f, {}).update(onefilenodes)
430 filenodes.setdefault(f, {}).update(onefilenodes)
431
431
432 if not dir and subdirnodes:
432 if not dir and subdirnodes:
433 assert subdirprogress is not None # help pytype
433 assert subdirprogress is not None # help pytype
434 subdirprogress.complete()
434 subdirprogress.complete()
435 if self.warnorphanstorefiles:
435 if self.warnorphanstorefiles:
436 for f in sorted(storefiles):
436 for f in sorted(storefiles):
437 self._warn(_(b"warning: orphan data file '%s'") % f)
437 self._warn(_(b"warning: orphan data file '%s'") % f)
438
438
439 return filenodes
439 return filenodes
440
440
441 def _crosscheckfiles(self, filelinkrevs, filenodes):
441 def _crosscheckfiles(self, filelinkrevs, filenodes):
442 repo = self.repo
442 repo = self.repo
443 ui = self.ui
443 ui = self.ui
444 ui.status(_(b"crosschecking files in changesets and manifests\n"))
444 ui.status(_(b"crosschecking files in changesets and manifests\n"))
445
445
446 total = len(filelinkrevs) + len(filenodes)
446 total = len(filelinkrevs) + len(filenodes)
447 progress = ui.makeprogress(
447 progress = ui.makeprogress(
448 _(b'crosschecking'), unit=_(b'files'), total=total
448 _(b'crosschecking'), unit=_(b'files'), total=total
449 )
449 )
450 if self.havemf:
450 if self.havemf:
451 for f in sorted(filelinkrevs):
451 for f in sorted(filelinkrevs):
452 progress.increment()
452 progress.increment()
453 if f not in filenodes:
453 if f not in filenodes:
454 lr = filelinkrevs[f][0]
454 lr = filelinkrevs[f][0]
455 self._err(lr, _(b"in changeset but not in manifest"), f)
455 self._err(lr, _(b"in changeset but not in manifest"), f)
456
456
457 if self.havecl:
457 if self.havecl:
458 for f in sorted(filenodes):
458 for f in sorted(filenodes):
459 progress.increment()
459 progress.increment()
460 if f not in filelinkrevs:
460 if f not in filelinkrevs:
461 try:
461 try:
462 fl = repo.file(f)
462 fl = repo.file(f)
463 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
463 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
464 except Exception:
464 except Exception:
465 lr = None
465 lr = None
466 self._err(lr, _(b"in manifest but not in changeset"), f)
466 self._err(lr, _(b"in manifest but not in changeset"), f)
467
467
468 progress.complete()
468 progress.complete()
469
469
470 def _verifyfiles(self, filenodes, filelinkrevs):
470 def _verifyfiles(self, filenodes, filelinkrevs):
471 repo = self.repo
471 repo = self.repo
472 ui = self.ui
472 ui = self.ui
473 lrugetctx = self.lrugetctx
473 lrugetctx = self.lrugetctx
474 revlogv1 = self.revlogv1
474 revlogv1 = self.revlogv1
475 havemf = self.havemf
475 havemf = self.havemf
476 ui.status(_(b"checking files\n"))
476 ui.status(_(b"checking files\n"))
477
477
478 storefiles = set()
478 storefiles = set()
479 undecodable = []
479 undecodable = []
480 for entry in repo.store.data_entries(undecodable=undecodable):
480 for entry in repo.store.data_entries(undecodable=undecodable):
481 for file_ in entry.files():
481 for file_ in entry.files():
482 size = file_.file_size(repo.store.vfs)
482 size = file_.file_size(repo.store.vfs)
483 f = file_.unencoded_path
483 f = file_.unencoded_path
484 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
484 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
485 storefiles.add(_normpath(f))
485 storefiles.add(_normpath(f))
486 for f in undecodable:
486 for f in undecodable:
487 self._err(None, _(b"cannot decode filename '%s'") % f)
487 self._err(None, _(b"cannot decode filename '%s'") % f)
488
488
489 state = {
489 state = {
490 # TODO this assumes revlog storage for changelog.
490 # TODO this assumes revlog storage for changelog.
491 b'expectedversion': self.repo.changelog._format_version,
491 b'expectedversion': self.repo.changelog._format_version,
492 b'skipflags': self.skipflags,
492 b'skipflags': self.skipflags,
493 # experimental config: censor.policy
493 # experimental config: censor.policy
494 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
494 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
495 }
495 }
496
496
497 files = sorted(set(filenodes) | set(filelinkrevs))
497 files = sorted(set(filenodes) | set(filelinkrevs))
498 revisions = 0
498 revisions = 0
499 progress = ui.makeprogress(
499 progress = ui.makeprogress(
500 _(b'checking'), unit=_(b'files'), total=len(files)
500 _(b'checking'), unit=_(b'files'), total=len(files)
501 )
501 )
502 for i, f in enumerate(files):
502 for i, f in enumerate(files):
503 progress.update(i, item=f)
503 progress.update(i, item=f)
504 try:
504 try:
505 linkrevs = filelinkrevs[f]
505 linkrevs = filelinkrevs[f]
506 except KeyError:
506 except KeyError:
507 # in manifest but not in changelog
507 # in manifest but not in changelog
508 linkrevs = []
508 linkrevs = []
509
509
510 if linkrevs:
510 if linkrevs:
511 lr = linkrevs[0]
511 lr = linkrevs[0]
512 else:
512 else:
513 lr = None
513 lr = None
514
514
515 try:
515 try:
516 fl = repo.file(f)
516 fl = repo.file(f)
517 except error.StorageError as e:
517 except error.StorageError as e:
518 self._err(lr, _(b"broken revlog! (%s)") % e, f)
518 self._err(lr, _(b"broken revlog! (%s)") % e, f)
519 continue
519 continue
520
520
521 for ff in fl.files():
521 for ff in fl.files():
522 try:
522 try:
523 storefiles.remove(ff)
523 storefiles.remove(ff)
524 except KeyError:
524 except KeyError:
525 if self.warnorphanstorefiles:
525 if self.warnorphanstorefiles:
526 msg = _(b" warning: revlog '%s' not in fncache!")
526 msg = _(b" warning: revlog '%s' not in fncache!")
527 self._warn(msg % ff)
527 self._warn(msg % ff)
528 self.fncachewarned = True
528 self.fncachewarned = True
529
529
530 if not len(fl) and (self.havecl or self.havemf):
530 if not len(fl) and (self.havecl or self.havemf):
531 self._err(lr, _(b"empty or missing %s") % f)
531 self._err(lr, _(b"empty or missing %s") % f)
532 else:
532 else:
533 # Guard against implementations not setting this.
533 # Guard against implementations not setting this.
534 state[b'skipread'] = set()
534 state[b'skipread'] = set()
535 state[b'safe_renamed'] = set()
535 state[b'safe_renamed'] = set()
536
536
537 for problem in fl.verifyintegrity(state):
537 for problem in fl.verifyintegrity(state):
538 if problem.node is not None:
538 if problem.node is not None:
539 linkrev = fl.linkrev(fl.rev(problem.node))
539 linkrev = fl.linkrev(fl.rev(problem.node))
540 else:
540 else:
541 linkrev = None
541 linkrev = None
542
542
543 if problem.warning:
543 if problem.warning:
544 self._warn(problem.warning)
544 self._warn(problem.warning)
545 elif problem.error:
545 elif problem.error:
546 linkrev_msg = linkrev if linkrev is not None else lr
546 linkrev_msg = linkrev if linkrev is not None else lr
547 self._err(linkrev_msg, problem.error, f)
547 self._err(linkrev_msg, problem.error, f)
548 else:
548 else:
549 raise error.ProgrammingError(
549 raise error.ProgrammingError(
550 b'problem instance does not set warning or error '
550 b'problem instance does not set warning or error '
551 b'attribute: %s' % problem.msg
551 b'attribute: %s' % problem.msg
552 )
552 )
553
553
554 seen = {}
554 seen = {}
555 for i in fl:
555 for i in fl:
556 revisions += 1
556 revisions += 1
557 n = fl.node(i)
557 n = fl.node(i)
558 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
558 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
559 if f in filenodes:
559 if f in filenodes:
560 if havemf and n not in filenodes[f]:
560 if havemf and n not in filenodes[f]:
561 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
561 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
562 else:
562 else:
563 del filenodes[f][n]
563 del filenodes[f][n]
564
564
565 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
565 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
566 continue
566 continue
567
567
568 # check renames
568 # check renames
569 try:
569 try:
570 # This requires resolving fulltext (at least on revlogs,
570 # This requires resolving fulltext (at least on revlogs,
571 # though not with LFS revisions). We may want
571 # though not with LFS revisions). We may want
572 # ``verifyintegrity()`` to pass a set of nodes with
572 # ``verifyintegrity()`` to pass a set of nodes with
573 # rename metadata as an optimization.
573 # rename metadata as an optimization.
574 rp = fl.renamed(n)
574 rp = fl.renamed(n)
575 if rp:
575 if rp:
576 if lr is not None and ui.verbose:
576 if lr is not None and ui.verbose:
577 ctx = lrugetctx(lr)
577 ctx = lrugetctx(lr)
578 if not any(rp[0] in pctx for pctx in ctx.parents()):
578 if not any(rp[0] in pctx for pctx in ctx.parents()):
579 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
579 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
580 fl2 = repo.file(rp[0])
580 fl2 = repo.file(rp[0])
581 if not len(fl2):
581 if not len(fl2):
582 m = _(b"empty or missing copy source revlog %s:%s")
582 m = _(b"empty or missing copy source revlog %s:%s")
583 self._err(lr, m % (rp[0], short(rp[1])), f)
583 self._err(lr, m % (rp[0], short(rp[1])), f)
584 elif rp[1] == self.repo.nullid:
584 elif rp[1] == self.repo.nullid:
585 msg = WARN_NULLID_COPY_SOURCE
585 msg = WARN_NULLID_COPY_SOURCE
586 msg %= (f, lr, rp[0], short(rp[1]))
586 msg %= (f, lr, rp[0], short(rp[1]))
587 ui.note(msg)
587 ui.note(msg)
588 else:
588 else:
589 fl2.rev(rp[1])
589 fl2.rev(rp[1])
590 except Exception as inst:
590 except Exception as inst:
591 self._exc(
591 self._exc(
592 lr, _(b"checking rename of %s") % short(n), inst, f
592 lr, _(b"checking rename of %s") % short(n), inst, f
593 )
593 )
594
594
595 # cross-check
595 # cross-check
596 if f in filenodes:
596 if f in filenodes:
597 fns = [(v, k) for k, v in filenodes[f].items()]
597 fns = [(v, k) for k, v in filenodes[f].items()]
598 for lr, node in sorted(fns):
598 for lr, node in sorted(fns):
599 msg = _(b"manifest refers to unknown revision %s")
599 msg = _(b"manifest refers to unknown revision %s")
600 self._err(lr, msg % short(node), f)
600 self._err(lr, msg % short(node), f)
601 progress.complete()
601 progress.complete()
602
602
603 if self.warnorphanstorefiles:
603 if self.warnorphanstorefiles:
604 for f in sorted(storefiles):
604 for f in sorted(storefiles):
605 self._warn(_(b"warning: orphan data file '%s'") % f)
605 self._warn(_(b"warning: orphan data file '%s'") % f)
606
606
607 return len(files), revisions
607 return len(files), revisions
608
608
609 def _verify_dirstate(self):
609 def _verify_dirstate(self):
610 """Check that the dirstate is consistent with the parent's manifest"""
610 """Check that the dirstate is consistent with the parent's manifest"""
611 repo = self.repo
611 repo = self.repo
612 ui = self.ui
612 ui = self.ui
613 ui.status(_(b"checking dirstate\n"))
613 ui.status(_(b"checking dirstate\n"))
614
614
615 parent1, parent2 = repo.dirstate.parents()
615 parent1, parent2 = repo.dirstate.parents()
616 m1 = repo[parent1].manifest()
616 m1 = repo[parent1].manifest()
617 m2 = repo[parent2].manifest()
617 m2 = repo[parent2].manifest()
618 dirstate_errors = 0
618 dirstate_errors = 0
619
619
620 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
620 is_narrow = requirements.NARROW_REQUIREMENT in repo.requirements
621 narrow_matcher = repo.narrowmatch() if is_narrow else None
621 narrow_matcher = repo.narrowmatch() if is_narrow else None
622
622
623 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
623 for err in repo.dirstate.verify(m1, m2, parent1, narrow_matcher):
624 ui.error(err)
624 ui.error(err)
625 dirstate_errors += 1
625 dirstate_errors += 1
626
626
627 if dirstate_errors:
627 if dirstate_errors:
628 self.errors += dirstate_errors
628 self.errors += dirstate_errors
629 return dirstate_errors
629 return dirstate_errors
General Comments 0
You need to be logged in to leave comments. Login now