##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48151:fb438539 default
parent child Browse files
Show More
@@ -1,611 +1,610 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45
45
46 class verifier(object):
46 class verifier(object):
47 def __init__(self, repo, level=None):
47 def __init__(self, repo, level=None):
48 self.repo = repo.unfiltered()
48 self.repo = repo.unfiltered()
49 self.ui = repo.ui
49 self.ui = repo.ui
50 self.match = repo.narrowmatch()
50 self.match = repo.narrowmatch()
51 if level is None:
51 if level is None:
52 level = VERIFY_DEFAULT
52 level = VERIFY_DEFAULT
53 self._level = level
53 self._level = level
54 self.badrevs = set()
54 self.badrevs = set()
55 self.errors = 0
55 self.errors = 0
56 self.warnings = 0
56 self.warnings = 0
57 self.havecl = len(repo.changelog) > 0
57 self.havecl = len(repo.changelog) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
59 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
59 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
61 self.refersmf = False
61 self.refersmf = False
62 self.fncachewarned = False
62 self.fncachewarned = False
63 # developer config: verify.skipflags
63 # developer config: verify.skipflags
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
65 self.warnorphanstorefiles = True
65 self.warnorphanstorefiles = True
66
66
67 def _warn(self, msg):
67 def _warn(self, msg):
68 """record a "warning" level issue"""
68 """record a "warning" level issue"""
69 self.ui.warn(msg + b"\n")
69 self.ui.warn(msg + b"\n")
70 self.warnings += 1
70 self.warnings += 1
71
71
72 def _err(self, linkrev, msg, filename=None):
72 def _err(self, linkrev, msg, filename=None):
73 """record a "error" level issue"""
73 """record a "error" level issue"""
74 if linkrev is not None:
74 if linkrev is not None:
75 self.badrevs.add(linkrev)
75 self.badrevs.add(linkrev)
76 linkrev = b"%d" % linkrev
76 linkrev = b"%d" % linkrev
77 else:
77 else:
78 linkrev = b'?'
78 linkrev = b'?'
79 msg = b"%s: %s" % (linkrev, msg)
79 msg = b"%s: %s" % (linkrev, msg)
80 if filename:
80 if filename:
81 msg = b"%s@%s" % (filename, msg)
81 msg = b"%s@%s" % (filename, msg)
82 self.ui.warn(b" " + msg + b"\n")
82 self.ui.warn(b" " + msg + b"\n")
83 self.errors += 1
83 self.errors += 1
84
84
85 def _exc(self, linkrev, msg, inst, filename=None):
85 def _exc(self, linkrev, msg, inst, filename=None):
86 """record exception raised during the verify process"""
86 """record exception raised during the verify process"""
87 fmsg = stringutil.forcebytestr(inst)
87 fmsg = stringutil.forcebytestr(inst)
88 if not fmsg:
88 if not fmsg:
89 fmsg = pycompat.byterepr(inst)
89 fmsg = pycompat.byterepr(inst)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
91
91
92 def _checkrevlog(self, obj, name, linkrev):
92 def _checkrevlog(self, obj, name, linkrev):
93 """verify high level property of a revlog
93 """verify high level property of a revlog
94
94
95 - revlog is present,
95 - revlog is present,
96 - revlog is non-empty,
96 - revlog is non-empty,
97 - sizes (index and data) are correct,
97 - sizes (index and data) are correct,
98 - revlog's format version is correct.
98 - revlog's format version is correct.
99 """
99 """
100 if not len(obj) and (self.havecl or self.havemf):
100 if not len(obj) and (self.havecl or self.havemf):
101 self._err(linkrev, _(b"empty or missing %s") % name)
101 self._err(linkrev, _(b"empty or missing %s") % name)
102 return
102 return
103
103
104 d = obj.checksize()
104 d = obj.checksize()
105 if d[0]:
105 if d[0]:
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
107 if d[1]:
107 if d[1]:
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
109
109
110 if obj._format_version != revlog.REVLOGV0:
110 if obj._format_version != revlog.REVLOGV0:
111 if not self.revlogv1:
111 if not self.revlogv1:
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
113 elif self.revlogv1:
113 elif self.revlogv1:
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
115
115
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
117 """verify a single revlog entry
117 """verify a single revlog entry
118
118
119 arguments are:
119 arguments are:
120 - obj: the source revlog
120 - obj: the source revlog
121 - i: the revision number
121 - i: the revision number
122 - node: the revision node id
122 - node: the revision node id
123 - seen: nodes previously seen for this revlog
123 - seen: nodes previously seen for this revlog
124 - linkrevs: [changelog-revisions] introducing "node"
124 - linkrevs: [changelog-revisions] introducing "node"
125 - f: string label ("changelog", "manifest", or filename)
125 - f: string label ("changelog", "manifest", or filename)
126
126
127 Performs the following checks:
127 Performs the following checks:
128 - linkrev points to an existing changelog revision,
128 - linkrev points to an existing changelog revision,
129 - linkrev points to a changelog revision that introduces this revision,
129 - linkrev points to a changelog revision that introduces this revision,
130 - linkrev points to the lowest of these changesets,
130 - linkrev points to the lowest of these changesets,
131 - both parents exist in the revlog,
131 - both parents exist in the revlog,
132 - the revision is not duplicated.
132 - the revision is not duplicated.
133
133
134 Return the linkrev of the revision (or None for changelog's revisions).
134 Return the linkrev of the revision (or None for changelog's revisions).
135 """
135 """
136 lr = obj.linkrev(obj.rev(node))
136 lr = obj.linkrev(obj.rev(node))
137 if lr < 0 or (self.havecl and lr not in linkrevs):
137 if lr < 0 or (self.havecl and lr not in linkrevs):
138 if lr < 0 or lr >= len(self.repo.changelog):
138 if lr < 0 or lr >= len(self.repo.changelog):
139 msg = _(b"rev %d points to nonexistent changeset %d")
139 msg = _(b"rev %d points to nonexistent changeset %d")
140 else:
140 else:
141 msg = _(b"rev %d points to unexpected changeset %d")
141 msg = _(b"rev %d points to unexpected changeset %d")
142 self._err(None, msg % (i, lr), f)
142 self._err(None, msg % (i, lr), f)
143 if linkrevs:
143 if linkrevs:
144 if f and len(linkrevs) > 1:
144 if f and len(linkrevs) > 1:
145 try:
145 try:
146 # attempt to filter down to real linkrevs
146 # attempt to filter down to real linkrevs
147 linkrevs = []
147 linkrevs = []
148 for lr in linkrevs:
148 for lr in linkrevs:
149 if self.lrugetctx(lr)[f].filenode() == node:
149 if self.lrugetctx(lr)[f].filenode() == node:
150 linkrevs.append(lr)
150 linkrevs.append(lr)
151 except Exception:
151 except Exception:
152 pass
152 pass
153 msg = _(b" (expected %s)")
153 msg = _(b" (expected %s)")
154 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
154 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
155 self._warn(msg)
155 self._warn(msg)
156 lr = None # can't be trusted
156 lr = None # can't be trusted
157
157
158 try:
158 try:
159 p1, p2 = obj.parents(node)
159 p1, p2 = obj.parents(node)
160 if p1 not in seen and p1 != self.repo.nullid:
160 if p1 not in seen and p1 != self.repo.nullid:
161 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
161 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
162 self._err(lr, msg, f)
162 self._err(lr, msg, f)
163 if p2 not in seen and p2 != self.repo.nullid:
163 if p2 not in seen and p2 != self.repo.nullid:
164 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
164 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
165 self._err(lr, msg, f)
165 self._err(lr, msg, f)
166 except Exception as inst:
166 except Exception as inst:
167 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
167 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
168
168
169 if node in seen:
169 if node in seen:
170 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
170 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
171 seen[node] = i
171 seen[node] = i
172 return lr
172 return lr
173
173
174 def verify(self):
174 def verify(self):
175 """verify the content of the Mercurial repository
175 """verify the content of the Mercurial repository
176
176
177 This method run all verifications, displaying issues as they are found.
177 This method run all verifications, displaying issues as they are found.
178
178
179 return 1 if any error have been encountered, 0 otherwise."""
179 return 1 if any error have been encountered, 0 otherwise."""
180 # initial validation and generic report
180 # initial validation and generic report
181 repo = self.repo
181 repo = self.repo
182 ui = repo.ui
182 ui = repo.ui
183 if not repo.url().startswith(b'file:'):
183 if not repo.url().startswith(b'file:'):
184 raise error.Abort(_(b"cannot verify bundle or remote repos"))
184 raise error.Abort(_(b"cannot verify bundle or remote repos"))
185
185
186 if os.path.exists(repo.sjoin(b"journal")):
186 if os.path.exists(repo.sjoin(b"journal")):
187 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
187 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
188
188
189 if ui.verbose or not self.revlogv1:
189 if ui.verbose or not self.revlogv1:
190 ui.status(
190 ui.status(
191 _(b"repository uses revlog format %d\n")
191 _(b"repository uses revlog format %d\n")
192 % (self.revlogv1 and 1 or 0)
192 % (self.revlogv1 and 1 or 0)
193 )
193 )
194
194
195 # data verification
195 # data verification
196 mflinkrevs, filelinkrevs = self._verifychangelog()
196 mflinkrevs, filelinkrevs = self._verifychangelog()
197 filenodes = self._verifymanifest(mflinkrevs)
197 filenodes = self._verifymanifest(mflinkrevs)
198 del mflinkrevs
198 del mflinkrevs
199 self._crosscheckfiles(filelinkrevs, filenodes)
199 self._crosscheckfiles(filelinkrevs, filenodes)
200 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
200 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
201
201
202 # final report
202 # final report
203 ui.status(
203 ui.status(
204 _(b"checked %d changesets with %d changes to %d files\n")
204 _(b"checked %d changesets with %d changes to %d files\n")
205 % (len(repo.changelog), filerevisions, totalfiles)
205 % (len(repo.changelog), filerevisions, totalfiles)
206 )
206 )
207 if self.warnings:
207 if self.warnings:
208 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
208 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
209 if self.fncachewarned:
209 if self.fncachewarned:
210 ui.warn(HINT_FNCACHE)
210 ui.warn(HINT_FNCACHE)
211 if self.errors:
211 if self.errors:
212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
213 if self.badrevs:
213 if self.badrevs:
214 msg = _(b"(first damaged changeset appears to be %d)\n")
214 msg = _(b"(first damaged changeset appears to be %d)\n")
215 msg %= min(self.badrevs)
215 msg %= min(self.badrevs)
216 ui.warn(msg)
216 ui.warn(msg)
217 return 1
217 return 1
218 return 0
218 return 0
219
219
220 def _verifychangelog(self):
220 def _verifychangelog(self):
221 """verify the changelog of a repository
221 """verify the changelog of a repository
222
222
223 The following checks are performed:
223 The following checks are performed:
224 - all of `_checkrevlog` checks,
224 - all of `_checkrevlog` checks,
225 - all of `_checkentry` checks (for each revisions),
225 - all of `_checkentry` checks (for each revisions),
226 - each revision can be read.
226 - each revision can be read.
227
227
228 The function returns some of the data observed in the changesets as a
228 The function returns some of the data observed in the changesets as a
229 (mflinkrevs, filelinkrevs) tuples:
229 (mflinkrevs, filelinkrevs) tuples:
230 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
230 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
231 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
231 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
232
232
233 If a matcher was specified, filelinkrevs will only contains matched
233 If a matcher was specified, filelinkrevs will only contains matched
234 files.
234 files.
235 """
235 """
236 ui = self.ui
236 ui = self.ui
237 repo = self.repo
237 repo = self.repo
238 match = self.match
238 match = self.match
239 cl = repo.changelog
239 cl = repo.changelog
240
240
241 ui.status(_(b"checking changesets\n"))
241 ui.status(_(b"checking changesets\n"))
242 mflinkrevs = {}
242 mflinkrevs = {}
243 filelinkrevs = {}
243 filelinkrevs = {}
244 seen = {}
244 seen = {}
245 self._checkrevlog(cl, b"changelog", 0)
245 self._checkrevlog(cl, b"changelog", 0)
246 progress = ui.makeprogress(
246 progress = ui.makeprogress(
247 _(b'checking'), unit=_(b'changesets'), total=len(repo)
247 _(b'checking'), unit=_(b'changesets'), total=len(repo)
248 )
248 )
249 for i in repo:
249 for i in repo:
250 progress.update(i)
250 progress.update(i)
251 n = cl.node(i)
251 n = cl.node(i)
252 self._checkentry(cl, i, n, seen, [i], b"changelog")
252 self._checkentry(cl, i, n, seen, [i], b"changelog")
253
253
254 try:
254 try:
255 changes = cl.read(n)
255 changes = cl.read(n)
256 if changes[0] != self.repo.nullid:
256 if changes[0] != self.repo.nullid:
257 mflinkrevs.setdefault(changes[0], []).append(i)
257 mflinkrevs.setdefault(changes[0], []).append(i)
258 self.refersmf = True
258 self.refersmf = True
259 for f in changes[3]:
259 for f in changes[3]:
260 if match(f):
260 if match(f):
261 filelinkrevs.setdefault(_normpath(f), []).append(i)
261 filelinkrevs.setdefault(_normpath(f), []).append(i)
262 except Exception as inst:
262 except Exception as inst:
263 self.refersmf = True
263 self.refersmf = True
264 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
264 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
265 progress.complete()
265 progress.complete()
266 return mflinkrevs, filelinkrevs
266 return mflinkrevs, filelinkrevs
267
267
268 def _verifymanifest(
268 def _verifymanifest(
269 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
269 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
270 ):
270 ):
271 """verify the manifestlog content
271 """verify the manifestlog content
272
272
273 Inputs:
273 Inputs:
274 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
274 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
275 - dir: a subdirectory to check (for tree manifest repo)
275 - dir: a subdirectory to check (for tree manifest repo)
276 - storefiles: set of currently "orphan" files.
276 - storefiles: set of currently "orphan" files.
277 - subdirprogress: a progress object
277 - subdirprogress: a progress object
278
278
279 This function checks:
279 This function checks:
280 * all of `_checkrevlog` checks (for all manifest related revlogs)
280 * all of `_checkrevlog` checks (for all manifest related revlogs)
281 * all of `_checkentry` checks (for all manifest related revisions)
281 * all of `_checkentry` checks (for all manifest related revisions)
282 * nodes for subdirectory exists in the sub-directory manifest
282 * nodes for subdirectory exists in the sub-directory manifest
283 * each manifest entries have a file path
283 * each manifest entries have a file path
284 * each manifest node refered in mflinkrevs exist in the manifest log
284 * each manifest node refered in mflinkrevs exist in the manifest log
285
285
286 If tree manifest is in use and a matchers is specified, only the
286 If tree manifest is in use and a matchers is specified, only the
287 sub-directories matching it will be verified.
287 sub-directories matching it will be verified.
288
288
289 return a two level mapping:
289 return a two level mapping:
290 {"path" -> { filenode -> changelog-revision}}
290 {"path" -> { filenode -> changelog-revision}}
291
291
292 This mapping primarily contains entries for every files in the
292 This mapping primarily contains entries for every files in the
293 repository. In addition, when tree-manifest is used, it also contains
293 repository. In addition, when tree-manifest is used, it also contains
294 sub-directory entries.
294 sub-directory entries.
295
295
296 If a matcher is provided, only matching paths will be included.
296 If a matcher is provided, only matching paths will be included.
297 """
297 """
298 repo = self.repo
298 repo = self.repo
299 ui = self.ui
299 ui = self.ui
300 match = self.match
300 match = self.match
301 mfl = self.repo.manifestlog
301 mfl = self.repo.manifestlog
302 mf = mfl.getstorage(dir)
302 mf = mfl.getstorage(dir)
303
303
304 if not dir:
304 if not dir:
305 self.ui.status(_(b"checking manifests\n"))
305 self.ui.status(_(b"checking manifests\n"))
306
306
307 filenodes = {}
307 filenodes = {}
308 subdirnodes = {}
308 subdirnodes = {}
309 seen = {}
309 seen = {}
310 label = b"manifest"
310 label = b"manifest"
311 if dir:
311 if dir:
312 label = dir
312 label = dir
313 revlogfiles = mf.files()
313 revlogfiles = mf.files()
314 storefiles.difference_update(revlogfiles)
314 storefiles.difference_update(revlogfiles)
315 if subdirprogress: # should be true since we're in a subdirectory
315 if subdirprogress: # should be true since we're in a subdirectory
316 subdirprogress.increment()
316 subdirprogress.increment()
317 if self.refersmf:
317 if self.refersmf:
318 # Do not check manifest if there are only changelog entries with
318 # Do not check manifest if there are only changelog entries with
319 # null manifests.
319 # null manifests.
320 self._checkrevlog(mf._revlog, label, 0)
320 self._checkrevlog(mf._revlog, label, 0)
321 progress = ui.makeprogress(
321 progress = ui.makeprogress(
322 _(b'checking'), unit=_(b'manifests'), total=len(mf)
322 _(b'checking'), unit=_(b'manifests'), total=len(mf)
323 )
323 )
324 for i in mf:
324 for i in mf:
325 if not dir:
325 if not dir:
326 progress.update(i)
326 progress.update(i)
327 n = mf.node(i)
327 n = mf.node(i)
328 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
328 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
329 if n in mflinkrevs:
329 if n in mflinkrevs:
330 del mflinkrevs[n]
330 del mflinkrevs[n]
331 elif dir:
331 elif dir:
332 msg = _(b"%s not in parent-directory manifest") % short(n)
332 msg = _(b"%s not in parent-directory manifest") % short(n)
333 self._err(lr, msg, label)
333 self._err(lr, msg, label)
334 else:
334 else:
335 self._err(lr, _(b"%s not in changesets") % short(n), label)
335 self._err(lr, _(b"%s not in changesets") % short(n), label)
336
336
337 try:
337 try:
338 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
338 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
339 for f, fn, fl in mfdelta.iterentries():
339 for f, fn, fl in mfdelta.iterentries():
340 if not f:
340 if not f:
341 self._err(lr, _(b"entry without name in manifest"))
341 self._err(lr, _(b"entry without name in manifest"))
342 elif f == b"/dev/null": # ignore this in very old repos
342 elif f == b"/dev/null": # ignore this in very old repos
343 continue
343 continue
344 fullpath = dir + _normpath(f)
344 fullpath = dir + _normpath(f)
345 if fl == b't':
345 if fl == b't':
346 if not match.visitdir(fullpath):
346 if not match.visitdir(fullpath):
347 continue
347 continue
348 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
348 sdn = subdirnodes.setdefault(fullpath + b'/', {})
349 fn, []
349 sdn.setdefault(fn, []).append(lr)
350 ).append(lr)
351 else:
350 else:
352 if not match(fullpath):
351 if not match(fullpath):
353 continue
352 continue
354 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
353 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
355 except Exception as inst:
354 except Exception as inst:
356 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
355 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
357 if self._level >= VERIFY_FULL:
356 if self._level >= VERIFY_FULL:
358 try:
357 try:
359 # Various issues can affect manifest. So we read each full
358 # Various issues can affect manifest. So we read each full
360 # text from storage. This triggers the checks from the core
359 # text from storage. This triggers the checks from the core
361 # code (eg: hash verification, filename are ordered, etc.)
360 # code (eg: hash verification, filename are ordered, etc.)
362 mfdelta = mfl.get(dir, n).read()
361 mfdelta = mfl.get(dir, n).read()
363 except Exception as inst:
362 except Exception as inst:
364 msg = _(b"reading full manifest %s") % short(n)
363 msg = _(b"reading full manifest %s") % short(n)
365 self._exc(lr, msg, inst, label)
364 self._exc(lr, msg, inst, label)
366
365
367 if not dir:
366 if not dir:
368 progress.complete()
367 progress.complete()
369
368
370 if self.havemf:
369 if self.havemf:
371 # since we delete entry in `mflinkrevs` during iteration, any
370 # since we delete entry in `mflinkrevs` during iteration, any
372 # remaining entries are "missing". We need to issue errors for them.
371 # remaining entries are "missing". We need to issue errors for them.
373 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
372 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
374 for c, m in sorted(changesetpairs):
373 for c, m in sorted(changesetpairs):
375 if dir:
374 if dir:
376 self._err(
375 self._err(
377 c,
376 c,
378 _(
377 _(
379 b"parent-directory manifest refers to unknown"
378 b"parent-directory manifest refers to unknown"
380 b" revision %s"
379 b" revision %s"
381 )
380 )
382 % short(m),
381 % short(m),
383 label,
382 label,
384 )
383 )
385 else:
384 else:
386 self._err(
385 self._err(
387 c,
386 c,
388 _(b"changeset refers to unknown revision %s")
387 _(b"changeset refers to unknown revision %s")
389 % short(m),
388 % short(m),
390 label,
389 label,
391 )
390 )
392
391
393 if not dir and subdirnodes:
392 if not dir and subdirnodes:
394 self.ui.status(_(b"checking directory manifests\n"))
393 self.ui.status(_(b"checking directory manifests\n"))
395 storefiles = set()
394 storefiles = set()
396 subdirs = set()
395 subdirs = set()
397 revlogv1 = self.revlogv1
396 revlogv1 = self.revlogv1
398 for t, f, f2, size in repo.store.datafiles():
397 for t, f, f2, size in repo.store.datafiles():
399 if not f:
398 if not f:
400 self._err(None, _(b"cannot decode filename '%s'") % f2)
399 self._err(None, _(b"cannot decode filename '%s'") % f2)
401 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
400 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
402 storefiles.add(_normpath(f))
401 storefiles.add(_normpath(f))
403 subdirs.add(os.path.dirname(f))
402 subdirs.add(os.path.dirname(f))
404 subdirprogress = ui.makeprogress(
403 subdirprogress = ui.makeprogress(
405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
404 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
406 )
405 )
407
406
408 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
407 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
409 subdirfilenodes = self._verifymanifest(
408 subdirfilenodes = self._verifymanifest(
410 linkrevs, subdir, storefiles, subdirprogress
409 linkrevs, subdir, storefiles, subdirprogress
411 )
410 )
412 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
411 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
413 filenodes.setdefault(f, {}).update(onefilenodes)
412 filenodes.setdefault(f, {}).update(onefilenodes)
414
413
415 if not dir and subdirnodes:
414 if not dir and subdirnodes:
416 assert subdirprogress is not None # help pytype
415 assert subdirprogress is not None # help pytype
417 subdirprogress.complete()
416 subdirprogress.complete()
418 if self.warnorphanstorefiles:
417 if self.warnorphanstorefiles:
419 for f in sorted(storefiles):
418 for f in sorted(storefiles):
420 self._warn(_(b"warning: orphan data file '%s'") % f)
419 self._warn(_(b"warning: orphan data file '%s'") % f)
421
420
422 return filenodes
421 return filenodes
423
422
424 def _crosscheckfiles(self, filelinkrevs, filenodes):
423 def _crosscheckfiles(self, filelinkrevs, filenodes):
425 repo = self.repo
424 repo = self.repo
426 ui = self.ui
425 ui = self.ui
427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
426 ui.status(_(b"crosschecking files in changesets and manifests\n"))
428
427
429 total = len(filelinkrevs) + len(filenodes)
428 total = len(filelinkrevs) + len(filenodes)
430 progress = ui.makeprogress(
429 progress = ui.makeprogress(
431 _(b'crosschecking'), unit=_(b'files'), total=total
430 _(b'crosschecking'), unit=_(b'files'), total=total
432 )
431 )
433 if self.havemf:
432 if self.havemf:
434 for f in sorted(filelinkrevs):
433 for f in sorted(filelinkrevs):
435 progress.increment()
434 progress.increment()
436 if f not in filenodes:
435 if f not in filenodes:
437 lr = filelinkrevs[f][0]
436 lr = filelinkrevs[f][0]
438 self._err(lr, _(b"in changeset but not in manifest"), f)
437 self._err(lr, _(b"in changeset but not in manifest"), f)
439
438
440 if self.havecl:
439 if self.havecl:
441 for f in sorted(filenodes):
440 for f in sorted(filenodes):
442 progress.increment()
441 progress.increment()
443 if f not in filelinkrevs:
442 if f not in filelinkrevs:
444 try:
443 try:
445 fl = repo.file(f)
444 fl = repo.file(f)
446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
445 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
447 except Exception:
446 except Exception:
448 lr = None
447 lr = None
449 self._err(lr, _(b"in manifest but not in changeset"), f)
448 self._err(lr, _(b"in manifest but not in changeset"), f)
450
449
451 progress.complete()
450 progress.complete()
452
451
453 def _verifyfiles(self, filenodes, filelinkrevs):
452 def _verifyfiles(self, filenodes, filelinkrevs):
454 repo = self.repo
453 repo = self.repo
455 ui = self.ui
454 ui = self.ui
456 lrugetctx = self.lrugetctx
455 lrugetctx = self.lrugetctx
457 revlogv1 = self.revlogv1
456 revlogv1 = self.revlogv1
458 havemf = self.havemf
457 havemf = self.havemf
459 ui.status(_(b"checking files\n"))
458 ui.status(_(b"checking files\n"))
460
459
461 storefiles = set()
460 storefiles = set()
462 for rl_type, f, f2, size in repo.store.datafiles():
461 for rl_type, f, f2, size in repo.store.datafiles():
463 if not f:
462 if not f:
464 self._err(None, _(b"cannot decode filename '%s'") % f2)
463 self._err(None, _(b"cannot decode filename '%s'") % f2)
465 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
464 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
466 storefiles.add(_normpath(f))
465 storefiles.add(_normpath(f))
467
466
468 state = {
467 state = {
469 # TODO this assumes revlog storage for changelog.
468 # TODO this assumes revlog storage for changelog.
470 b'expectedversion': self.repo.changelog._format_version,
469 b'expectedversion': self.repo.changelog._format_version,
471 b'skipflags': self.skipflags,
470 b'skipflags': self.skipflags,
472 # experimental config: censor.policy
471 # experimental config: censor.policy
473 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
472 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
474 }
473 }
475
474
476 files = sorted(set(filenodes) | set(filelinkrevs))
475 files = sorted(set(filenodes) | set(filelinkrevs))
477 revisions = 0
476 revisions = 0
478 progress = ui.makeprogress(
477 progress = ui.makeprogress(
479 _(b'checking'), unit=_(b'files'), total=len(files)
478 _(b'checking'), unit=_(b'files'), total=len(files)
480 )
479 )
481 for i, f in enumerate(files):
480 for i, f in enumerate(files):
482 progress.update(i, item=f)
481 progress.update(i, item=f)
483 try:
482 try:
484 linkrevs = filelinkrevs[f]
483 linkrevs = filelinkrevs[f]
485 except KeyError:
484 except KeyError:
486 # in manifest but not in changelog
485 # in manifest but not in changelog
487 linkrevs = []
486 linkrevs = []
488
487
489 if linkrevs:
488 if linkrevs:
490 lr = linkrevs[0]
489 lr = linkrevs[0]
491 else:
490 else:
492 lr = None
491 lr = None
493
492
494 try:
493 try:
495 fl = repo.file(f)
494 fl = repo.file(f)
496 except error.StorageError as e:
495 except error.StorageError as e:
497 self._err(lr, _(b"broken revlog! (%s)") % e, f)
496 self._err(lr, _(b"broken revlog! (%s)") % e, f)
498 continue
497 continue
499
498
500 for ff in fl.files():
499 for ff in fl.files():
501 try:
500 try:
502 storefiles.remove(ff)
501 storefiles.remove(ff)
503 except KeyError:
502 except KeyError:
504 if self.warnorphanstorefiles:
503 if self.warnorphanstorefiles:
505 self._warn(
504 self._warn(
506 _(b" warning: revlog '%s' not in fncache!") % ff
505 _(b" warning: revlog '%s' not in fncache!") % ff
507 )
506 )
508 self.fncachewarned = True
507 self.fncachewarned = True
509
508
510 if not len(fl) and (self.havecl or self.havemf):
509 if not len(fl) and (self.havecl or self.havemf):
511 self._err(lr, _(b"empty or missing %s") % f)
510 self._err(lr, _(b"empty or missing %s") % f)
512 else:
511 else:
513 # Guard against implementations not setting this.
512 # Guard against implementations not setting this.
514 state[b'skipread'] = set()
513 state[b'skipread'] = set()
515 state[b'safe_renamed'] = set()
514 state[b'safe_renamed'] = set()
516
515
517 for problem in fl.verifyintegrity(state):
516 for problem in fl.verifyintegrity(state):
518 if problem.node is not None:
517 if problem.node is not None:
519 linkrev = fl.linkrev(fl.rev(problem.node))
518 linkrev = fl.linkrev(fl.rev(problem.node))
520 else:
519 else:
521 linkrev = None
520 linkrev = None
522
521
523 if problem.warning:
522 if problem.warning:
524 self._warn(problem.warning)
523 self._warn(problem.warning)
525 elif problem.error:
524 elif problem.error:
526 self._err(
525 self._err(
527 linkrev if linkrev is not None else lr,
526 linkrev if linkrev is not None else lr,
528 problem.error,
527 problem.error,
529 f,
528 f,
530 )
529 )
531 else:
530 else:
532 raise error.ProgrammingError(
531 raise error.ProgrammingError(
533 b'problem instance does not set warning or error '
532 b'problem instance does not set warning or error '
534 b'attribute: %s' % problem.msg
533 b'attribute: %s' % problem.msg
535 )
534 )
536
535
537 seen = {}
536 seen = {}
538 for i in fl:
537 for i in fl:
539 revisions += 1
538 revisions += 1
540 n = fl.node(i)
539 n = fl.node(i)
541 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
540 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
542 if f in filenodes:
541 if f in filenodes:
543 if havemf and n not in filenodes[f]:
542 if havemf and n not in filenodes[f]:
544 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
543 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
545 else:
544 else:
546 del filenodes[f][n]
545 del filenodes[f][n]
547
546
548 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
547 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
549 continue
548 continue
550
549
551 # check renames
550 # check renames
552 try:
551 try:
553 # This requires resolving fulltext (at least on revlogs,
552 # This requires resolving fulltext (at least on revlogs,
554 # though not with LFS revisions). We may want
553 # though not with LFS revisions). We may want
555 # ``verifyintegrity()`` to pass a set of nodes with
554 # ``verifyintegrity()`` to pass a set of nodes with
556 # rename metadata as an optimization.
555 # rename metadata as an optimization.
557 rp = fl.renamed(n)
556 rp = fl.renamed(n)
558 if rp:
557 if rp:
559 if lr is not None and ui.verbose:
558 if lr is not None and ui.verbose:
560 ctx = lrugetctx(lr)
559 ctx = lrugetctx(lr)
561 if not any(rp[0] in pctx for pctx in ctx.parents()):
560 if not any(rp[0] in pctx for pctx in ctx.parents()):
562 self._warn(
561 self._warn(
563 _(
562 _(
564 b"warning: copy source of '%s' not"
563 b"warning: copy source of '%s' not"
565 b" in parents of %s"
564 b" in parents of %s"
566 )
565 )
567 % (f, ctx)
566 % (f, ctx)
568 )
567 )
569 fl2 = repo.file(rp[0])
568 fl2 = repo.file(rp[0])
570 if not len(fl2):
569 if not len(fl2):
571 self._err(
570 self._err(
572 lr,
571 lr,
573 _(
572 _(
574 b"empty or missing copy source revlog "
573 b"empty or missing copy source revlog "
575 b"%s:%s"
574 b"%s:%s"
576 )
575 )
577 % (rp[0], short(rp[1])),
576 % (rp[0], short(rp[1])),
578 f,
577 f,
579 )
578 )
580 elif rp[1] == self.repo.nullid:
579 elif rp[1] == self.repo.nullid:
581 ui.note(
580 ui.note(
582 _(
581 _(
583 b"warning: %s@%s: copy source"
582 b"warning: %s@%s: copy source"
584 b" revision is nullid %s:%s\n"
583 b" revision is nullid %s:%s\n"
585 )
584 )
586 % (f, lr, rp[0], short(rp[1]))
585 % (f, lr, rp[0], short(rp[1]))
587 )
586 )
588 else:
587 else:
589 fl2.rev(rp[1])
588 fl2.rev(rp[1])
590 except Exception as inst:
589 except Exception as inst:
591 self._exc(
590 self._exc(
592 lr, _(b"checking rename of %s") % short(n), inst, f
591 lr, _(b"checking rename of %s") % short(n), inst, f
593 )
592 )
594
593
595 # cross-check
594 # cross-check
596 if f in filenodes:
595 if f in filenodes:
597 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
596 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
598 for lr, node in sorted(fns):
597 for lr, node in sorted(fns):
599 self._err(
598 self._err(
600 lr,
599 lr,
601 _(b"manifest refers to unknown revision %s")
600 _(b"manifest refers to unknown revision %s")
602 % short(node),
601 % short(node),
603 f,
602 f,
604 )
603 )
605 progress.complete()
604 progress.complete()
606
605
607 if self.warnorphanstorefiles:
606 if self.warnorphanstorefiles:
608 for f in sorted(storefiles):
607 for f in sorted(storefiles):
609 self._warn(_(b"warning: orphan data file '%s'") % f)
608 self._warn(_(b"warning: orphan data file '%s'") % f)
610
609
611 return len(files), revisions
610 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now