##// END OF EJS Templates
verify: convert an exception to bytes before logging...
Matt Harbison -
r47523:ed0899e0 stable
parent child Browse files
Show More
@@ -1,628 +1,631
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 nullid,
14 nullid,
15 short,
15 short,
16 )
16 )
17 from .utils import (
18 stringutil,
19 )
17
20
18 from . import (
21 from . import (
19 error,
22 error,
20 pycompat,
23 pycompat,
21 revlog,
24 revlog,
22 util,
25 util,
23 )
26 )
24
27
25 VERIFY_DEFAULT = 0
28 VERIFY_DEFAULT = 0
26 VERIFY_FULL = 1
29 VERIFY_FULL = 1
27
30
28
31
29 def verify(repo, level=None):
32 def verify(repo, level=None):
30 with repo.lock():
33 with repo.lock():
31 v = verifier(repo, level)
34 v = verifier(repo, level)
32 return v.verify()
35 return v.verify()
33
36
34
37
35 def _normpath(f):
38 def _normpath(f):
36 # under hg < 2.4, convert didn't sanitize paths properly, so a
39 # under hg < 2.4, convert didn't sanitize paths properly, so a
37 # converted repo may contain repeated slashes
40 # converted repo may contain repeated slashes
38 while b'//' in f:
41 while b'//' in f:
39 f = f.replace(b'//', b'/')
42 f = f.replace(b'//', b'/')
40 return f
43 return f
41
44
42
45
43 class verifier(object):
46 class verifier(object):
44 def __init__(self, repo, level=None):
47 def __init__(self, repo, level=None):
45 self.repo = repo.unfiltered()
48 self.repo = repo.unfiltered()
46 self.ui = repo.ui
49 self.ui = repo.ui
47 self.match = repo.narrowmatch()
50 self.match = repo.narrowmatch()
48 if level is None:
51 if level is None:
49 level = VERIFY_DEFAULT
52 level = VERIFY_DEFAULT
50 self._level = level
53 self._level = level
51 self.badrevs = set()
54 self.badrevs = set()
52 self.errors = 0
55 self.errors = 0
53 self.warnings = 0
56 self.warnings = 0
54 self.havecl = len(repo.changelog) > 0
57 self.havecl = len(repo.changelog) > 0
55 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
56 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
59 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
57 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
58 self.refersmf = False
61 self.refersmf = False
59 self.fncachewarned = False
62 self.fncachewarned = False
60 # developer config: verify.skipflags
63 # developer config: verify.skipflags
61 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
62 self.warnorphanstorefiles = True
65 self.warnorphanstorefiles = True
63
66
64 def _warn(self, msg):
67 def _warn(self, msg):
65 """record a "warning" level issue"""
68 """record a "warning" level issue"""
66 self.ui.warn(msg + b"\n")
69 self.ui.warn(msg + b"\n")
67 self.warnings += 1
70 self.warnings += 1
68
71
69 def _err(self, linkrev, msg, filename=None):
72 def _err(self, linkrev, msg, filename=None):
70 """record a "error" level issue"""
73 """record a "error" level issue"""
71 if linkrev is not None:
74 if linkrev is not None:
72 self.badrevs.add(linkrev)
75 self.badrevs.add(linkrev)
73 linkrev = b"%d" % linkrev
76 linkrev = b"%d" % linkrev
74 else:
77 else:
75 linkrev = b'?'
78 linkrev = b'?'
76 msg = b"%s: %s" % (linkrev, msg)
79 msg = b"%s: %s" % (linkrev, msg)
77 if filename:
80 if filename:
78 msg = b"%s@%s" % (filename, msg)
81 msg = b"%s@%s" % (filename, msg)
79 self.ui.warn(b" " + msg + b"\n")
82 self.ui.warn(b" " + msg + b"\n")
80 self.errors += 1
83 self.errors += 1
81
84
82 def _exc(self, linkrev, msg, inst, filename=None):
85 def _exc(self, linkrev, msg, inst, filename=None):
83 """record exception raised during the verify process"""
86 """record exception raised during the verify process"""
84 fmsg = pycompat.bytestr(inst)
87 fmsg = stringutil.forcebytestr(inst)
85 if not fmsg:
88 if not fmsg:
86 fmsg = pycompat.byterepr(inst)
89 fmsg = pycompat.byterepr(inst)
87 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
88
91
89 def _checkrevlog(self, obj, name, linkrev):
92 def _checkrevlog(self, obj, name, linkrev):
90 """verify high level property of a revlog
93 """verify high level property of a revlog
91
94
92 - revlog is present,
95 - revlog is present,
93 - revlog is non-empty,
96 - revlog is non-empty,
94 - sizes (index and data) are correct,
97 - sizes (index and data) are correct,
95 - revlog's format version is correct.
98 - revlog's format version is correct.
96 """
99 """
97 if not len(obj) and (self.havecl or self.havemf):
100 if not len(obj) and (self.havecl or self.havemf):
98 self._err(linkrev, _(b"empty or missing %s") % name)
101 self._err(linkrev, _(b"empty or missing %s") % name)
99 return
102 return
100
103
101 d = obj.checksize()
104 d = obj.checksize()
102 if d[0]:
105 if d[0]:
103 self._err(None, _(b"data length off by %d bytes") % d[0], name)
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
104 if d[1]:
107 if d[1]:
105 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
106
109
107 if obj.version != revlog.REVLOGV0:
110 if obj.version != revlog.REVLOGV0:
108 if not self.revlogv1:
111 if not self.revlogv1:
109 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
110 elif self.revlogv1:
113 elif self.revlogv1:
111 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
112
115
113 def _checkentry(self, obj, i, node, seen, linkrevs, f):
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
114 """verify a single revlog entry
117 """verify a single revlog entry
115
118
116 arguments are:
119 arguments are:
117 - obj: the source revlog
120 - obj: the source revlog
118 - i: the revision number
121 - i: the revision number
119 - node: the revision node id
122 - node: the revision node id
120 - seen: nodes previously seen for this revlog
123 - seen: nodes previously seen for this revlog
121 - linkrevs: [changelog-revisions] introducing "node"
124 - linkrevs: [changelog-revisions] introducing "node"
122 - f: string label ("changelog", "manifest", or filename)
125 - f: string label ("changelog", "manifest", or filename)
123
126
124 Performs the following checks:
127 Performs the following checks:
125 - linkrev points to an existing changelog revision,
128 - linkrev points to an existing changelog revision,
126 - linkrev points to a changelog revision that introduces this revision,
129 - linkrev points to a changelog revision that introduces this revision,
127 - linkrev points to the lowest of these changesets,
130 - linkrev points to the lowest of these changesets,
128 - both parents exist in the revlog,
131 - both parents exist in the revlog,
129 - the revision is not duplicated.
132 - the revision is not duplicated.
130
133
131 Return the linkrev of the revision (or None for changelog's revisions).
134 Return the linkrev of the revision (or None for changelog's revisions).
132 """
135 """
133 lr = obj.linkrev(obj.rev(node))
136 lr = obj.linkrev(obj.rev(node))
134 if lr < 0 or (self.havecl and lr not in linkrevs):
137 if lr < 0 or (self.havecl and lr not in linkrevs):
135 if lr < 0 or lr >= len(self.repo.changelog):
138 if lr < 0 or lr >= len(self.repo.changelog):
136 msg = _(b"rev %d points to nonexistent changeset %d")
139 msg = _(b"rev %d points to nonexistent changeset %d")
137 else:
140 else:
138 msg = _(b"rev %d points to unexpected changeset %d")
141 msg = _(b"rev %d points to unexpected changeset %d")
139 self._err(None, msg % (i, lr), f)
142 self._err(None, msg % (i, lr), f)
140 if linkrevs:
143 if linkrevs:
141 if f and len(linkrevs) > 1:
144 if f and len(linkrevs) > 1:
142 try:
145 try:
143 # attempt to filter down to real linkrevs
146 # attempt to filter down to real linkrevs
144 linkrevs = [
147 linkrevs = [
145 l
148 l
146 for l in linkrevs
149 for l in linkrevs
147 if self.lrugetctx(l)[f].filenode() == node
150 if self.lrugetctx(l)[f].filenode() == node
148 ]
151 ]
149 except Exception:
152 except Exception:
150 pass
153 pass
151 self._warn(
154 self._warn(
152 _(b" (expected %s)")
155 _(b" (expected %s)")
153 % b" ".join(map(pycompat.bytestr, linkrevs))
156 % b" ".join(map(pycompat.bytestr, linkrevs))
154 )
157 )
155 lr = None # can't be trusted
158 lr = None # can't be trusted
156
159
157 try:
160 try:
158 p1, p2 = obj.parents(node)
161 p1, p2 = obj.parents(node)
159 if p1 not in seen and p1 != nullid:
162 if p1 not in seen and p1 != nullid:
160 self._err(
163 self._err(
161 lr,
164 lr,
162 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
165 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
163 f,
166 f,
164 )
167 )
165 if p2 not in seen and p2 != nullid:
168 if p2 not in seen and p2 != nullid:
166 self._err(
169 self._err(
167 lr,
170 lr,
168 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
171 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
169 f,
172 f,
170 )
173 )
171 except Exception as inst:
174 except Exception as inst:
172 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
175 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
173
176
174 if node in seen:
177 if node in seen:
175 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
178 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
176 seen[node] = i
179 seen[node] = i
177 return lr
180 return lr
178
181
179 def verify(self):
182 def verify(self):
180 """verify the content of the Mercurial repository
183 """verify the content of the Mercurial repository
181
184
182 This method run all verifications, displaying issues as they are found.
185 This method run all verifications, displaying issues as they are found.
183
186
184 return 1 if any error have been encountered, 0 otherwise."""
187 return 1 if any error have been encountered, 0 otherwise."""
185 # initial validation and generic report
188 # initial validation and generic report
186 repo = self.repo
189 repo = self.repo
187 ui = repo.ui
190 ui = repo.ui
188 if not repo.url().startswith(b'file:'):
191 if not repo.url().startswith(b'file:'):
189 raise error.Abort(_(b"cannot verify bundle or remote repos"))
192 raise error.Abort(_(b"cannot verify bundle or remote repos"))
190
193
191 if os.path.exists(repo.sjoin(b"journal")):
194 if os.path.exists(repo.sjoin(b"journal")):
192 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
195 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
193
196
194 if ui.verbose or not self.revlogv1:
197 if ui.verbose or not self.revlogv1:
195 ui.status(
198 ui.status(
196 _(b"repository uses revlog format %d\n")
199 _(b"repository uses revlog format %d\n")
197 % (self.revlogv1 and 1 or 0)
200 % (self.revlogv1 and 1 or 0)
198 )
201 )
199
202
200 # data verification
203 # data verification
201 mflinkrevs, filelinkrevs = self._verifychangelog()
204 mflinkrevs, filelinkrevs = self._verifychangelog()
202 filenodes = self._verifymanifest(mflinkrevs)
205 filenodes = self._verifymanifest(mflinkrevs)
203 del mflinkrevs
206 del mflinkrevs
204 self._crosscheckfiles(filelinkrevs, filenodes)
207 self._crosscheckfiles(filelinkrevs, filenodes)
205 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
208 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
206
209
207 # final report
210 # final report
208 ui.status(
211 ui.status(
209 _(b"checked %d changesets with %d changes to %d files\n")
212 _(b"checked %d changesets with %d changes to %d files\n")
210 % (len(repo.changelog), filerevisions, totalfiles)
213 % (len(repo.changelog), filerevisions, totalfiles)
211 )
214 )
212 if self.warnings:
215 if self.warnings:
213 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
216 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
214 if self.fncachewarned:
217 if self.fncachewarned:
215 ui.warn(
218 ui.warn(
216 _(
219 _(
217 b'hint: run "hg debugrebuildfncache" to recover from '
220 b'hint: run "hg debugrebuildfncache" to recover from '
218 b'corrupt fncache\n'
221 b'corrupt fncache\n'
219 )
222 )
220 )
223 )
221 if self.errors:
224 if self.errors:
222 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
225 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
223 if self.badrevs:
226 if self.badrevs:
224 ui.warn(
227 ui.warn(
225 _(b"(first damaged changeset appears to be %d)\n")
228 _(b"(first damaged changeset appears to be %d)\n")
226 % min(self.badrevs)
229 % min(self.badrevs)
227 )
230 )
228 return 1
231 return 1
229 return 0
232 return 0
230
233
231 def _verifychangelog(self):
234 def _verifychangelog(self):
232 """verify the changelog of a repository
235 """verify the changelog of a repository
233
236
234 The following checks are performed:
237 The following checks are performed:
235 - all of `_checkrevlog` checks,
238 - all of `_checkrevlog` checks,
236 - all of `_checkentry` checks (for each revisions),
239 - all of `_checkentry` checks (for each revisions),
237 - each revision can be read.
240 - each revision can be read.
238
241
239 The function returns some of the data observed in the changesets as a
242 The function returns some of the data observed in the changesets as a
240 (mflinkrevs, filelinkrevs) tuples:
243 (mflinkrevs, filelinkrevs) tuples:
241 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
244 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
242 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
245 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
243
246
244 If a matcher was specified, filelinkrevs will only contains matched
247 If a matcher was specified, filelinkrevs will only contains matched
245 files.
248 files.
246 """
249 """
247 ui = self.ui
250 ui = self.ui
248 repo = self.repo
251 repo = self.repo
249 match = self.match
252 match = self.match
250 cl = repo.changelog
253 cl = repo.changelog
251
254
252 ui.status(_(b"checking changesets\n"))
255 ui.status(_(b"checking changesets\n"))
253 mflinkrevs = {}
256 mflinkrevs = {}
254 filelinkrevs = {}
257 filelinkrevs = {}
255 seen = {}
258 seen = {}
256 self._checkrevlog(cl, b"changelog", 0)
259 self._checkrevlog(cl, b"changelog", 0)
257 progress = ui.makeprogress(
260 progress = ui.makeprogress(
258 _(b'checking'), unit=_(b'changesets'), total=len(repo)
261 _(b'checking'), unit=_(b'changesets'), total=len(repo)
259 )
262 )
260 for i in repo:
263 for i in repo:
261 progress.update(i)
264 progress.update(i)
262 n = cl.node(i)
265 n = cl.node(i)
263 self._checkentry(cl, i, n, seen, [i], b"changelog")
266 self._checkentry(cl, i, n, seen, [i], b"changelog")
264
267
265 try:
268 try:
266 changes = cl.read(n)
269 changes = cl.read(n)
267 if changes[0] != nullid:
270 if changes[0] != nullid:
268 mflinkrevs.setdefault(changes[0], []).append(i)
271 mflinkrevs.setdefault(changes[0], []).append(i)
269 self.refersmf = True
272 self.refersmf = True
270 for f in changes[3]:
273 for f in changes[3]:
271 if match(f):
274 if match(f):
272 filelinkrevs.setdefault(_normpath(f), []).append(i)
275 filelinkrevs.setdefault(_normpath(f), []).append(i)
273 except Exception as inst:
276 except Exception as inst:
274 self.refersmf = True
277 self.refersmf = True
275 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
278 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
276 progress.complete()
279 progress.complete()
277 return mflinkrevs, filelinkrevs
280 return mflinkrevs, filelinkrevs
278
281
279 def _verifymanifest(
282 def _verifymanifest(
280 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
283 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
281 ):
284 ):
282 """verify the manifestlog content
285 """verify the manifestlog content
283
286
284 Inputs:
287 Inputs:
285 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
288 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
286 - dir: a subdirectory to check (for tree manifest repo)
289 - dir: a subdirectory to check (for tree manifest repo)
287 - storefiles: set of currently "orphan" files.
290 - storefiles: set of currently "orphan" files.
288 - subdirprogress: a progress object
291 - subdirprogress: a progress object
289
292
290 This function checks:
293 This function checks:
291 * all of `_checkrevlog` checks (for all manifest related revlogs)
294 * all of `_checkrevlog` checks (for all manifest related revlogs)
292 * all of `_checkentry` checks (for all manifest related revisions)
295 * all of `_checkentry` checks (for all manifest related revisions)
293 * nodes for subdirectory exists in the sub-directory manifest
296 * nodes for subdirectory exists in the sub-directory manifest
294 * each manifest entries have a file path
297 * each manifest entries have a file path
295 * each manifest node refered in mflinkrevs exist in the manifest log
298 * each manifest node refered in mflinkrevs exist in the manifest log
296
299
297 If tree manifest is in use and a matchers is specified, only the
300 If tree manifest is in use and a matchers is specified, only the
298 sub-directories matching it will be verified.
301 sub-directories matching it will be verified.
299
302
300 return a two level mapping:
303 return a two level mapping:
301 {"path" -> { filenode -> changelog-revision}}
304 {"path" -> { filenode -> changelog-revision}}
302
305
303 This mapping primarily contains entries for every files in the
306 This mapping primarily contains entries for every files in the
304 repository. In addition, when tree-manifest is used, it also contains
307 repository. In addition, when tree-manifest is used, it also contains
305 sub-directory entries.
308 sub-directory entries.
306
309
307 If a matcher is provided, only matching paths will be included.
310 If a matcher is provided, only matching paths will be included.
308 """
311 """
309 repo = self.repo
312 repo = self.repo
310 ui = self.ui
313 ui = self.ui
311 match = self.match
314 match = self.match
312 mfl = self.repo.manifestlog
315 mfl = self.repo.manifestlog
313 mf = mfl.getstorage(dir)
316 mf = mfl.getstorage(dir)
314
317
315 if not dir:
318 if not dir:
316 self.ui.status(_(b"checking manifests\n"))
319 self.ui.status(_(b"checking manifests\n"))
317
320
318 filenodes = {}
321 filenodes = {}
319 subdirnodes = {}
322 subdirnodes = {}
320 seen = {}
323 seen = {}
321 label = b"manifest"
324 label = b"manifest"
322 if dir:
325 if dir:
323 label = dir
326 label = dir
324 revlogfiles = mf.files()
327 revlogfiles = mf.files()
325 storefiles.difference_update(revlogfiles)
328 storefiles.difference_update(revlogfiles)
326 if subdirprogress: # should be true since we're in a subdirectory
329 if subdirprogress: # should be true since we're in a subdirectory
327 subdirprogress.increment()
330 subdirprogress.increment()
328 if self.refersmf:
331 if self.refersmf:
329 # Do not check manifest if there are only changelog entries with
332 # Do not check manifest if there are only changelog entries with
330 # null manifests.
333 # null manifests.
331 self._checkrevlog(mf, label, 0)
334 self._checkrevlog(mf, label, 0)
332 progress = ui.makeprogress(
335 progress = ui.makeprogress(
333 _(b'checking'), unit=_(b'manifests'), total=len(mf)
336 _(b'checking'), unit=_(b'manifests'), total=len(mf)
334 )
337 )
335 for i in mf:
338 for i in mf:
336 if not dir:
339 if not dir:
337 progress.update(i)
340 progress.update(i)
338 n = mf.node(i)
341 n = mf.node(i)
339 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
342 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
340 if n in mflinkrevs:
343 if n in mflinkrevs:
341 del mflinkrevs[n]
344 del mflinkrevs[n]
342 elif dir:
345 elif dir:
343 self._err(
346 self._err(
344 lr,
347 lr,
345 _(b"%s not in parent-directory manifest") % short(n),
348 _(b"%s not in parent-directory manifest") % short(n),
346 label,
349 label,
347 )
350 )
348 else:
351 else:
349 self._err(lr, _(b"%s not in changesets") % short(n), label)
352 self._err(lr, _(b"%s not in changesets") % short(n), label)
350
353
351 try:
354 try:
352 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
355 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
353 for f, fn, fl in mfdelta.iterentries():
356 for f, fn, fl in mfdelta.iterentries():
354 if not f:
357 if not f:
355 self._err(lr, _(b"entry without name in manifest"))
358 self._err(lr, _(b"entry without name in manifest"))
356 elif f == b"/dev/null": # ignore this in very old repos
359 elif f == b"/dev/null": # ignore this in very old repos
357 continue
360 continue
358 fullpath = dir + _normpath(f)
361 fullpath = dir + _normpath(f)
359 if fl == b't':
362 if fl == b't':
360 if not match.visitdir(fullpath):
363 if not match.visitdir(fullpath):
361 continue
364 continue
362 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
365 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
363 fn, []
366 fn, []
364 ).append(lr)
367 ).append(lr)
365 else:
368 else:
366 if not match(fullpath):
369 if not match(fullpath):
367 continue
370 continue
368 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
371 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
369 except Exception as inst:
372 except Exception as inst:
370 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
373 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
371 if self._level >= VERIFY_FULL:
374 if self._level >= VERIFY_FULL:
372 try:
375 try:
373 # Various issues can affect manifest. So we read each full
376 # Various issues can affect manifest. So we read each full
374 # text from storage. This triggers the checks from the core
377 # text from storage. This triggers the checks from the core
375 # code (eg: hash verification, filename are ordered, etc.)
378 # code (eg: hash verification, filename are ordered, etc.)
376 mfdelta = mfl.get(dir, n).read()
379 mfdelta = mfl.get(dir, n).read()
377 except Exception as inst:
380 except Exception as inst:
378 self._exc(
381 self._exc(
379 lr,
382 lr,
380 _(b"reading full manifest %s") % short(n),
383 _(b"reading full manifest %s") % short(n),
381 inst,
384 inst,
382 label,
385 label,
383 )
386 )
384
387
385 if not dir:
388 if not dir:
386 progress.complete()
389 progress.complete()
387
390
388 if self.havemf:
391 if self.havemf:
389 # since we delete entry in `mflinkrevs` during iteration, any
392 # since we delete entry in `mflinkrevs` during iteration, any
390 # remaining entries are "missing". We need to issue errors for them.
393 # remaining entries are "missing". We need to issue errors for them.
391 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
394 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
392 for c, m in sorted(changesetpairs):
395 for c, m in sorted(changesetpairs):
393 if dir:
396 if dir:
394 self._err(
397 self._err(
395 c,
398 c,
396 _(
399 _(
397 b"parent-directory manifest refers to unknown"
400 b"parent-directory manifest refers to unknown"
398 b" revision %s"
401 b" revision %s"
399 )
402 )
400 % short(m),
403 % short(m),
401 label,
404 label,
402 )
405 )
403 else:
406 else:
404 self._err(
407 self._err(
405 c,
408 c,
406 _(b"changeset refers to unknown revision %s")
409 _(b"changeset refers to unknown revision %s")
407 % short(m),
410 % short(m),
408 label,
411 label,
409 )
412 )
410
413
411 if not dir and subdirnodes:
414 if not dir and subdirnodes:
412 self.ui.status(_(b"checking directory manifests\n"))
415 self.ui.status(_(b"checking directory manifests\n"))
413 storefiles = set()
416 storefiles = set()
414 subdirs = set()
417 subdirs = set()
415 revlogv1 = self.revlogv1
418 revlogv1 = self.revlogv1
416 for f, f2, size in repo.store.datafiles():
419 for f, f2, size in repo.store.datafiles():
417 if not f:
420 if not f:
418 self._err(None, _(b"cannot decode filename '%s'") % f2)
421 self._err(None, _(b"cannot decode filename '%s'") % f2)
419 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
422 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
420 storefiles.add(_normpath(f))
423 storefiles.add(_normpath(f))
421 subdirs.add(os.path.dirname(f))
424 subdirs.add(os.path.dirname(f))
422 subdirprogress = ui.makeprogress(
425 subdirprogress = ui.makeprogress(
423 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
426 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
424 )
427 )
425
428
426 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
429 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
427 subdirfilenodes = self._verifymanifest(
430 subdirfilenodes = self._verifymanifest(
428 linkrevs, subdir, storefiles, subdirprogress
431 linkrevs, subdir, storefiles, subdirprogress
429 )
432 )
430 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
433 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
431 filenodes.setdefault(f, {}).update(onefilenodes)
434 filenodes.setdefault(f, {}).update(onefilenodes)
432
435
433 if not dir and subdirnodes:
436 if not dir and subdirnodes:
434 subdirprogress.complete()
437 subdirprogress.complete()
435 if self.warnorphanstorefiles:
438 if self.warnorphanstorefiles:
436 for f in sorted(storefiles):
439 for f in sorted(storefiles):
437 self._warn(_(b"warning: orphan data file '%s'") % f)
440 self._warn(_(b"warning: orphan data file '%s'") % f)
438
441
439 return filenodes
442 return filenodes
440
443
441 def _crosscheckfiles(self, filelinkrevs, filenodes):
444 def _crosscheckfiles(self, filelinkrevs, filenodes):
442 repo = self.repo
445 repo = self.repo
443 ui = self.ui
446 ui = self.ui
444 ui.status(_(b"crosschecking files in changesets and manifests\n"))
447 ui.status(_(b"crosschecking files in changesets and manifests\n"))
445
448
446 total = len(filelinkrevs) + len(filenodes)
449 total = len(filelinkrevs) + len(filenodes)
447 progress = ui.makeprogress(
450 progress = ui.makeprogress(
448 _(b'crosschecking'), unit=_(b'files'), total=total
451 _(b'crosschecking'), unit=_(b'files'), total=total
449 )
452 )
450 if self.havemf:
453 if self.havemf:
451 for f in sorted(filelinkrevs):
454 for f in sorted(filelinkrevs):
452 progress.increment()
455 progress.increment()
453 if f not in filenodes:
456 if f not in filenodes:
454 lr = filelinkrevs[f][0]
457 lr = filelinkrevs[f][0]
455 self._err(lr, _(b"in changeset but not in manifest"), f)
458 self._err(lr, _(b"in changeset but not in manifest"), f)
456
459
457 if self.havecl:
460 if self.havecl:
458 for f in sorted(filenodes):
461 for f in sorted(filenodes):
459 progress.increment()
462 progress.increment()
460 if f not in filelinkrevs:
463 if f not in filelinkrevs:
461 try:
464 try:
462 fl = repo.file(f)
465 fl = repo.file(f)
463 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
466 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
464 except Exception:
467 except Exception:
465 lr = None
468 lr = None
466 self._err(lr, _(b"in manifest but not in changeset"), f)
469 self._err(lr, _(b"in manifest but not in changeset"), f)
467
470
468 progress.complete()
471 progress.complete()
469
472
470 def _verifyfiles(self, filenodes, filelinkrevs):
473 def _verifyfiles(self, filenodes, filelinkrevs):
471 repo = self.repo
474 repo = self.repo
472 ui = self.ui
475 ui = self.ui
473 lrugetctx = self.lrugetctx
476 lrugetctx = self.lrugetctx
474 revlogv1 = self.revlogv1
477 revlogv1 = self.revlogv1
475 havemf = self.havemf
478 havemf = self.havemf
476 ui.status(_(b"checking files\n"))
479 ui.status(_(b"checking files\n"))
477
480
478 storefiles = set()
481 storefiles = set()
479 for f, f2, size in repo.store.datafiles():
482 for f, f2, size in repo.store.datafiles():
480 if not f:
483 if not f:
481 self._err(None, _(b"cannot decode filename '%s'") % f2)
484 self._err(None, _(b"cannot decode filename '%s'") % f2)
482 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
485 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
483 storefiles.add(_normpath(f))
486 storefiles.add(_normpath(f))
484
487
485 state = {
488 state = {
486 # TODO this assumes revlog storage for changelog.
489 # TODO this assumes revlog storage for changelog.
487 b'expectedversion': self.repo.changelog.version & 0xFFFF,
490 b'expectedversion': self.repo.changelog.version & 0xFFFF,
488 b'skipflags': self.skipflags,
491 b'skipflags': self.skipflags,
489 # experimental config: censor.policy
492 # experimental config: censor.policy
490 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
493 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
491 }
494 }
492
495
493 files = sorted(set(filenodes) | set(filelinkrevs))
496 files = sorted(set(filenodes) | set(filelinkrevs))
494 revisions = 0
497 revisions = 0
495 progress = ui.makeprogress(
498 progress = ui.makeprogress(
496 _(b'checking'), unit=_(b'files'), total=len(files)
499 _(b'checking'), unit=_(b'files'), total=len(files)
497 )
500 )
498 for i, f in enumerate(files):
501 for i, f in enumerate(files):
499 progress.update(i, item=f)
502 progress.update(i, item=f)
500 try:
503 try:
501 linkrevs = filelinkrevs[f]
504 linkrevs = filelinkrevs[f]
502 except KeyError:
505 except KeyError:
503 # in manifest but not in changelog
506 # in manifest but not in changelog
504 linkrevs = []
507 linkrevs = []
505
508
506 if linkrevs:
509 if linkrevs:
507 lr = linkrevs[0]
510 lr = linkrevs[0]
508 else:
511 else:
509 lr = None
512 lr = None
510
513
511 try:
514 try:
512 fl = repo.file(f)
515 fl = repo.file(f)
513 except error.StorageError as e:
516 except error.StorageError as e:
514 self._err(lr, _(b"broken revlog! (%s)") % e, f)
517 self._err(lr, _(b"broken revlog! (%s)") % e, f)
515 continue
518 continue
516
519
517 for ff in fl.files():
520 for ff in fl.files():
518 try:
521 try:
519 storefiles.remove(ff)
522 storefiles.remove(ff)
520 except KeyError:
523 except KeyError:
521 if self.warnorphanstorefiles:
524 if self.warnorphanstorefiles:
522 self._warn(
525 self._warn(
523 _(b" warning: revlog '%s' not in fncache!") % ff
526 _(b" warning: revlog '%s' not in fncache!") % ff
524 )
527 )
525 self.fncachewarned = True
528 self.fncachewarned = True
526
529
527 if not len(fl) and (self.havecl or self.havemf):
530 if not len(fl) and (self.havecl or self.havemf):
528 self._err(lr, _(b"empty or missing %s") % f)
531 self._err(lr, _(b"empty or missing %s") % f)
529 else:
532 else:
530 # Guard against implementations not setting this.
533 # Guard against implementations not setting this.
531 state[b'skipread'] = set()
534 state[b'skipread'] = set()
532 state[b'safe_renamed'] = set()
535 state[b'safe_renamed'] = set()
533
536
534 for problem in fl.verifyintegrity(state):
537 for problem in fl.verifyintegrity(state):
535 if problem.node is not None:
538 if problem.node is not None:
536 linkrev = fl.linkrev(fl.rev(problem.node))
539 linkrev = fl.linkrev(fl.rev(problem.node))
537 else:
540 else:
538 linkrev = None
541 linkrev = None
539
542
540 if problem.warning:
543 if problem.warning:
541 self._warn(problem.warning)
544 self._warn(problem.warning)
542 elif problem.error:
545 elif problem.error:
543 self._err(
546 self._err(
544 linkrev if linkrev is not None else lr,
547 linkrev if linkrev is not None else lr,
545 problem.error,
548 problem.error,
546 f,
549 f,
547 )
550 )
548 else:
551 else:
549 raise error.ProgrammingError(
552 raise error.ProgrammingError(
550 b'problem instance does not set warning or error '
553 b'problem instance does not set warning or error '
551 b'attribute: %s' % problem.msg
554 b'attribute: %s' % problem.msg
552 )
555 )
553
556
554 seen = {}
557 seen = {}
555 for i in fl:
558 for i in fl:
556 revisions += 1
559 revisions += 1
557 n = fl.node(i)
560 n = fl.node(i)
558 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
561 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
559 if f in filenodes:
562 if f in filenodes:
560 if havemf and n not in filenodes[f]:
563 if havemf and n not in filenodes[f]:
561 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
564 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
562 else:
565 else:
563 del filenodes[f][n]
566 del filenodes[f][n]
564
567
565 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
568 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
566 continue
569 continue
567
570
568 # check renames
571 # check renames
569 try:
572 try:
570 # This requires resolving fulltext (at least on revlogs,
573 # This requires resolving fulltext (at least on revlogs,
571 # though not with LFS revisions). We may want
574 # though not with LFS revisions). We may want
572 # ``verifyintegrity()`` to pass a set of nodes with
575 # ``verifyintegrity()`` to pass a set of nodes with
573 # rename metadata as an optimization.
576 # rename metadata as an optimization.
574 rp = fl.renamed(n)
577 rp = fl.renamed(n)
575 if rp:
578 if rp:
576 if lr is not None and ui.verbose:
579 if lr is not None and ui.verbose:
577 ctx = lrugetctx(lr)
580 ctx = lrugetctx(lr)
578 if not any(rp[0] in pctx for pctx in ctx.parents()):
581 if not any(rp[0] in pctx for pctx in ctx.parents()):
579 self._warn(
582 self._warn(
580 _(
583 _(
581 b"warning: copy source of '%s' not"
584 b"warning: copy source of '%s' not"
582 b" in parents of %s"
585 b" in parents of %s"
583 )
586 )
584 % (f, ctx)
587 % (f, ctx)
585 )
588 )
586 fl2 = repo.file(rp[0])
589 fl2 = repo.file(rp[0])
587 if not len(fl2):
590 if not len(fl2):
588 self._err(
591 self._err(
589 lr,
592 lr,
590 _(
593 _(
591 b"empty or missing copy source revlog "
594 b"empty or missing copy source revlog "
592 b"%s:%s"
595 b"%s:%s"
593 )
596 )
594 % (rp[0], short(rp[1])),
597 % (rp[0], short(rp[1])),
595 f,
598 f,
596 )
599 )
597 elif rp[1] == nullid:
600 elif rp[1] == nullid:
598 ui.note(
601 ui.note(
599 _(
602 _(
600 b"warning: %s@%s: copy source"
603 b"warning: %s@%s: copy source"
601 b" revision is nullid %s:%s\n"
604 b" revision is nullid %s:%s\n"
602 )
605 )
603 % (f, lr, rp[0], short(rp[1]))
606 % (f, lr, rp[0], short(rp[1]))
604 )
607 )
605 else:
608 else:
606 fl2.rev(rp[1])
609 fl2.rev(rp[1])
607 except Exception as inst:
610 except Exception as inst:
608 self._exc(
611 self._exc(
609 lr, _(b"checking rename of %s") % short(n), inst, f
612 lr, _(b"checking rename of %s") % short(n), inst, f
610 )
613 )
611
614
612 # cross-check
615 # cross-check
613 if f in filenodes:
616 if f in filenodes:
614 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
617 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
615 for lr, node in sorted(fns):
618 for lr, node in sorted(fns):
616 self._err(
619 self._err(
617 lr,
620 lr,
618 _(b"manifest refers to unknown revision %s")
621 _(b"manifest refers to unknown revision %s")
619 % short(node),
622 % short(node),
620 f,
623 f,
621 )
624 )
622 progress.complete()
625 progress.complete()
623
626
624 if self.warnorphanstorefiles:
627 if self.warnorphanstorefiles:
625 for f in sorted(storefiles):
628 for f in sorted(storefiles):
626 self._warn(_(b"warning: orphan data file '%s'") % f)
629 self._warn(_(b"warning: orphan data file '%s'") % f)
627
630
628 return len(files), revisions
631 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now