##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48146:fde1df74 default
parent child Browse files
Show More
@@ -1,622 +1,619 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 class verifier(object):
41 class verifier(object):
42 def __init__(self, repo, level=None):
42 def __init__(self, repo, level=None):
43 self.repo = repo.unfiltered()
43 self.repo = repo.unfiltered()
44 self.ui = repo.ui
44 self.ui = repo.ui
45 self.match = repo.narrowmatch()
45 self.match = repo.narrowmatch()
46 if level is None:
46 if level is None:
47 level = VERIFY_DEFAULT
47 level = VERIFY_DEFAULT
48 self._level = level
48 self._level = level
49 self.badrevs = set()
49 self.badrevs = set()
50 self.errors = 0
50 self.errors = 0
51 self.warnings = 0
51 self.warnings = 0
52 self.havecl = len(repo.changelog) > 0
52 self.havecl = len(repo.changelog) > 0
53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
56 self.refersmf = False
56 self.refersmf = False
57 self.fncachewarned = False
57 self.fncachewarned = False
58 # developer config: verify.skipflags
58 # developer config: verify.skipflags
59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
60 self.warnorphanstorefiles = True
60 self.warnorphanstorefiles = True
61
61
62 def _warn(self, msg):
62 def _warn(self, msg):
63 """record a "warning" level issue"""
63 """record a "warning" level issue"""
64 self.ui.warn(msg + b"\n")
64 self.ui.warn(msg + b"\n")
65 self.warnings += 1
65 self.warnings += 1
66
66
67 def _err(self, linkrev, msg, filename=None):
67 def _err(self, linkrev, msg, filename=None):
68 """record a "error" level issue"""
68 """record a "error" level issue"""
69 if linkrev is not None:
69 if linkrev is not None:
70 self.badrevs.add(linkrev)
70 self.badrevs.add(linkrev)
71 linkrev = b"%d" % linkrev
71 linkrev = b"%d" % linkrev
72 else:
72 else:
73 linkrev = b'?'
73 linkrev = b'?'
74 msg = b"%s: %s" % (linkrev, msg)
74 msg = b"%s: %s" % (linkrev, msg)
75 if filename:
75 if filename:
76 msg = b"%s@%s" % (filename, msg)
76 msg = b"%s@%s" % (filename, msg)
77 self.ui.warn(b" " + msg + b"\n")
77 self.ui.warn(b" " + msg + b"\n")
78 self.errors += 1
78 self.errors += 1
79
79
80 def _exc(self, linkrev, msg, inst, filename=None):
80 def _exc(self, linkrev, msg, inst, filename=None):
81 """record exception raised during the verify process"""
81 """record exception raised during the verify process"""
82 fmsg = stringutil.forcebytestr(inst)
82 fmsg = stringutil.forcebytestr(inst)
83 if not fmsg:
83 if not fmsg:
84 fmsg = pycompat.byterepr(inst)
84 fmsg = pycompat.byterepr(inst)
85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
86
86
87 def _checkrevlog(self, obj, name, linkrev):
87 def _checkrevlog(self, obj, name, linkrev):
88 """verify high level property of a revlog
88 """verify high level property of a revlog
89
89
90 - revlog is present,
90 - revlog is present,
91 - revlog is non-empty,
91 - revlog is non-empty,
92 - sizes (index and data) are correct,
92 - sizes (index and data) are correct,
93 - revlog's format version is correct.
93 - revlog's format version is correct.
94 """
94 """
95 if not len(obj) and (self.havecl or self.havemf):
95 if not len(obj) and (self.havecl or self.havemf):
96 self._err(linkrev, _(b"empty or missing %s") % name)
96 self._err(linkrev, _(b"empty or missing %s") % name)
97 return
97 return
98
98
99 d = obj.checksize()
99 d = obj.checksize()
100 if d[0]:
100 if d[0]:
101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
102 if d[1]:
102 if d[1]:
103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
104
104
105 if obj._format_version != revlog.REVLOGV0:
105 if obj._format_version != revlog.REVLOGV0:
106 if not self.revlogv1:
106 if not self.revlogv1:
107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
108 elif self.revlogv1:
108 elif self.revlogv1:
109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
110
110
111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
112 """verify a single revlog entry
112 """verify a single revlog entry
113
113
114 arguments are:
114 arguments are:
115 - obj: the source revlog
115 - obj: the source revlog
116 - i: the revision number
116 - i: the revision number
117 - node: the revision node id
117 - node: the revision node id
118 - seen: nodes previously seen for this revlog
118 - seen: nodes previously seen for this revlog
119 - linkrevs: [changelog-revisions] introducing "node"
119 - linkrevs: [changelog-revisions] introducing "node"
120 - f: string label ("changelog", "manifest", or filename)
120 - f: string label ("changelog", "manifest", or filename)
121
121
122 Performs the following checks:
122 Performs the following checks:
123 - linkrev points to an existing changelog revision,
123 - linkrev points to an existing changelog revision,
124 - linkrev points to a changelog revision that introduces this revision,
124 - linkrev points to a changelog revision that introduces this revision,
125 - linkrev points to the lowest of these changesets,
125 - linkrev points to the lowest of these changesets,
126 - both parents exist in the revlog,
126 - both parents exist in the revlog,
127 - the revision is not duplicated.
127 - the revision is not duplicated.
128
128
129 Return the linkrev of the revision (or None for changelog's revisions).
129 Return the linkrev of the revision (or None for changelog's revisions).
130 """
130 """
131 lr = obj.linkrev(obj.rev(node))
131 lr = obj.linkrev(obj.rev(node))
132 if lr < 0 or (self.havecl and lr not in linkrevs):
132 if lr < 0 or (self.havecl and lr not in linkrevs):
133 if lr < 0 or lr >= len(self.repo.changelog):
133 if lr < 0 or lr >= len(self.repo.changelog):
134 msg = _(b"rev %d points to nonexistent changeset %d")
134 msg = _(b"rev %d points to nonexistent changeset %d")
135 else:
135 else:
136 msg = _(b"rev %d points to unexpected changeset %d")
136 msg = _(b"rev %d points to unexpected changeset %d")
137 self._err(None, msg % (i, lr), f)
137 self._err(None, msg % (i, lr), f)
138 if linkrevs:
138 if linkrevs:
139 if f and len(linkrevs) > 1:
139 if f and len(linkrevs) > 1:
140 try:
140 try:
141 # attempt to filter down to real linkrevs
141 # attempt to filter down to real linkrevs
142 linkrevs = []
142 linkrevs = []
143 for lr in linkrevs:
143 for lr in linkrevs:
144 if self.lrugetctx(lr)[f].filenode() == node:
144 if self.lrugetctx(lr)[f].filenode() == node:
145 linkrevs.append(lr)
145 linkrevs.append(lr)
146 except Exception:
146 except Exception:
147 pass
147 pass
148 msg = _(b" (expected %s)")
148 msg = _(b" (expected %s)")
149 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
149 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
150 self._warn(msg)
150 self._warn(msg)
151 lr = None # can't be trusted
151 lr = None # can't be trusted
152
152
153 try:
153 try:
154 p1, p2 = obj.parents(node)
154 p1, p2 = obj.parents(node)
155 if p1 not in seen and p1 != self.repo.nullid:
155 if p1 not in seen and p1 != self.repo.nullid:
156 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
156 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
157 self._err(lr, msg, f)
157 self._err(lr, msg, f)
158 if p2 not in seen and p2 != self.repo.nullid:
158 if p2 not in seen and p2 != self.repo.nullid:
159 self._err(
159 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
160 lr,
160 self._err(lr, msg, f)
161 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
162 f,
163 )
164 except Exception as inst:
161 except Exception as inst:
165 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
162 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
166
163
167 if node in seen:
164 if node in seen:
168 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
165 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
169 seen[node] = i
166 seen[node] = i
170 return lr
167 return lr
171
168
172 def verify(self):
169 def verify(self):
173 """verify the content of the Mercurial repository
170 """verify the content of the Mercurial repository
174
171
175 This method run all verifications, displaying issues as they are found.
172 This method run all verifications, displaying issues as they are found.
176
173
177 return 1 if any error have been encountered, 0 otherwise."""
174 return 1 if any error have been encountered, 0 otherwise."""
178 # initial validation and generic report
175 # initial validation and generic report
179 repo = self.repo
176 repo = self.repo
180 ui = repo.ui
177 ui = repo.ui
181 if not repo.url().startswith(b'file:'):
178 if not repo.url().startswith(b'file:'):
182 raise error.Abort(_(b"cannot verify bundle or remote repos"))
179 raise error.Abort(_(b"cannot verify bundle or remote repos"))
183
180
184 if os.path.exists(repo.sjoin(b"journal")):
181 if os.path.exists(repo.sjoin(b"journal")):
185 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
182 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
186
183
187 if ui.verbose or not self.revlogv1:
184 if ui.verbose or not self.revlogv1:
188 ui.status(
185 ui.status(
189 _(b"repository uses revlog format %d\n")
186 _(b"repository uses revlog format %d\n")
190 % (self.revlogv1 and 1 or 0)
187 % (self.revlogv1 and 1 or 0)
191 )
188 )
192
189
193 # data verification
190 # data verification
194 mflinkrevs, filelinkrevs = self._verifychangelog()
191 mflinkrevs, filelinkrevs = self._verifychangelog()
195 filenodes = self._verifymanifest(mflinkrevs)
192 filenodes = self._verifymanifest(mflinkrevs)
196 del mflinkrevs
193 del mflinkrevs
197 self._crosscheckfiles(filelinkrevs, filenodes)
194 self._crosscheckfiles(filelinkrevs, filenodes)
198 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
195 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
199
196
200 # final report
197 # final report
201 ui.status(
198 ui.status(
202 _(b"checked %d changesets with %d changes to %d files\n")
199 _(b"checked %d changesets with %d changes to %d files\n")
203 % (len(repo.changelog), filerevisions, totalfiles)
200 % (len(repo.changelog), filerevisions, totalfiles)
204 )
201 )
205 if self.warnings:
202 if self.warnings:
206 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
203 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
207 if self.fncachewarned:
204 if self.fncachewarned:
208 ui.warn(
205 ui.warn(
209 _(
206 _(
210 b'hint: run "hg debugrebuildfncache" to recover from '
207 b'hint: run "hg debugrebuildfncache" to recover from '
211 b'corrupt fncache\n'
208 b'corrupt fncache\n'
212 )
209 )
213 )
210 )
214 if self.errors:
211 if self.errors:
215 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
216 if self.badrevs:
213 if self.badrevs:
217 ui.warn(
214 ui.warn(
218 _(b"(first damaged changeset appears to be %d)\n")
215 _(b"(first damaged changeset appears to be %d)\n")
219 % min(self.badrevs)
216 % min(self.badrevs)
220 )
217 )
221 return 1
218 return 1
222 return 0
219 return 0
223
220
224 def _verifychangelog(self):
221 def _verifychangelog(self):
225 """verify the changelog of a repository
222 """verify the changelog of a repository
226
223
227 The following checks are performed:
224 The following checks are performed:
228 - all of `_checkrevlog` checks,
225 - all of `_checkrevlog` checks,
229 - all of `_checkentry` checks (for each revisions),
226 - all of `_checkentry` checks (for each revisions),
230 - each revision can be read.
227 - each revision can be read.
231
228
232 The function returns some of the data observed in the changesets as a
229 The function returns some of the data observed in the changesets as a
233 (mflinkrevs, filelinkrevs) tuples:
230 (mflinkrevs, filelinkrevs) tuples:
234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
231 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
232 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
236
233
237 If a matcher was specified, filelinkrevs will only contains matched
234 If a matcher was specified, filelinkrevs will only contains matched
238 files.
235 files.
239 """
236 """
240 ui = self.ui
237 ui = self.ui
241 repo = self.repo
238 repo = self.repo
242 match = self.match
239 match = self.match
243 cl = repo.changelog
240 cl = repo.changelog
244
241
245 ui.status(_(b"checking changesets\n"))
242 ui.status(_(b"checking changesets\n"))
246 mflinkrevs = {}
243 mflinkrevs = {}
247 filelinkrevs = {}
244 filelinkrevs = {}
248 seen = {}
245 seen = {}
249 self._checkrevlog(cl, b"changelog", 0)
246 self._checkrevlog(cl, b"changelog", 0)
250 progress = ui.makeprogress(
247 progress = ui.makeprogress(
251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
248 _(b'checking'), unit=_(b'changesets'), total=len(repo)
252 )
249 )
253 for i in repo:
250 for i in repo:
254 progress.update(i)
251 progress.update(i)
255 n = cl.node(i)
252 n = cl.node(i)
256 self._checkentry(cl, i, n, seen, [i], b"changelog")
253 self._checkentry(cl, i, n, seen, [i], b"changelog")
257
254
258 try:
255 try:
259 changes = cl.read(n)
256 changes = cl.read(n)
260 if changes[0] != self.repo.nullid:
257 if changes[0] != self.repo.nullid:
261 mflinkrevs.setdefault(changes[0], []).append(i)
258 mflinkrevs.setdefault(changes[0], []).append(i)
262 self.refersmf = True
259 self.refersmf = True
263 for f in changes[3]:
260 for f in changes[3]:
264 if match(f):
261 if match(f):
265 filelinkrevs.setdefault(_normpath(f), []).append(i)
262 filelinkrevs.setdefault(_normpath(f), []).append(i)
266 except Exception as inst:
263 except Exception as inst:
267 self.refersmf = True
264 self.refersmf = True
268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
265 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
269 progress.complete()
266 progress.complete()
270 return mflinkrevs, filelinkrevs
267 return mflinkrevs, filelinkrevs
271
268
272 def _verifymanifest(
269 def _verifymanifest(
273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
270 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
274 ):
271 ):
275 """verify the manifestlog content
272 """verify the manifestlog content
276
273
277 Inputs:
274 Inputs:
278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
275 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
279 - dir: a subdirectory to check (for tree manifest repo)
276 - dir: a subdirectory to check (for tree manifest repo)
280 - storefiles: set of currently "orphan" files.
277 - storefiles: set of currently "orphan" files.
281 - subdirprogress: a progress object
278 - subdirprogress: a progress object
282
279
283 This function checks:
280 This function checks:
284 * all of `_checkrevlog` checks (for all manifest related revlogs)
281 * all of `_checkrevlog` checks (for all manifest related revlogs)
285 * all of `_checkentry` checks (for all manifest related revisions)
282 * all of `_checkentry` checks (for all manifest related revisions)
286 * nodes for subdirectory exists in the sub-directory manifest
283 * nodes for subdirectory exists in the sub-directory manifest
287 * each manifest entries have a file path
284 * each manifest entries have a file path
288 * each manifest node refered in mflinkrevs exist in the manifest log
285 * each manifest node refered in mflinkrevs exist in the manifest log
289
286
290 If tree manifest is in use and a matchers is specified, only the
287 If tree manifest is in use and a matchers is specified, only the
291 sub-directories matching it will be verified.
288 sub-directories matching it will be verified.
292
289
293 return a two level mapping:
290 return a two level mapping:
294 {"path" -> { filenode -> changelog-revision}}
291 {"path" -> { filenode -> changelog-revision}}
295
292
296 This mapping primarily contains entries for every files in the
293 This mapping primarily contains entries for every files in the
297 repository. In addition, when tree-manifest is used, it also contains
294 repository. In addition, when tree-manifest is used, it also contains
298 sub-directory entries.
295 sub-directory entries.
299
296
300 If a matcher is provided, only matching paths will be included.
297 If a matcher is provided, only matching paths will be included.
301 """
298 """
302 repo = self.repo
299 repo = self.repo
303 ui = self.ui
300 ui = self.ui
304 match = self.match
301 match = self.match
305 mfl = self.repo.manifestlog
302 mfl = self.repo.manifestlog
306 mf = mfl.getstorage(dir)
303 mf = mfl.getstorage(dir)
307
304
308 if not dir:
305 if not dir:
309 self.ui.status(_(b"checking manifests\n"))
306 self.ui.status(_(b"checking manifests\n"))
310
307
311 filenodes = {}
308 filenodes = {}
312 subdirnodes = {}
309 subdirnodes = {}
313 seen = {}
310 seen = {}
314 label = b"manifest"
311 label = b"manifest"
315 if dir:
312 if dir:
316 label = dir
313 label = dir
317 revlogfiles = mf.files()
314 revlogfiles = mf.files()
318 storefiles.difference_update(revlogfiles)
315 storefiles.difference_update(revlogfiles)
319 if subdirprogress: # should be true since we're in a subdirectory
316 if subdirprogress: # should be true since we're in a subdirectory
320 subdirprogress.increment()
317 subdirprogress.increment()
321 if self.refersmf:
318 if self.refersmf:
322 # Do not check manifest if there are only changelog entries with
319 # Do not check manifest if there are only changelog entries with
323 # null manifests.
320 # null manifests.
324 self._checkrevlog(mf._revlog, label, 0)
321 self._checkrevlog(mf._revlog, label, 0)
325 progress = ui.makeprogress(
322 progress = ui.makeprogress(
326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
323 _(b'checking'), unit=_(b'manifests'), total=len(mf)
327 )
324 )
328 for i in mf:
325 for i in mf:
329 if not dir:
326 if not dir:
330 progress.update(i)
327 progress.update(i)
331 n = mf.node(i)
328 n = mf.node(i)
332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
329 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
333 if n in mflinkrevs:
330 if n in mflinkrevs:
334 del mflinkrevs[n]
331 del mflinkrevs[n]
335 elif dir:
332 elif dir:
336 self._err(
333 self._err(
337 lr,
334 lr,
338 _(b"%s not in parent-directory manifest") % short(n),
335 _(b"%s not in parent-directory manifest") % short(n),
339 label,
336 label,
340 )
337 )
341 else:
338 else:
342 self._err(lr, _(b"%s not in changesets") % short(n), label)
339 self._err(lr, _(b"%s not in changesets") % short(n), label)
343
340
344 try:
341 try:
345 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
346 for f, fn, fl in mfdelta.iterentries():
343 for f, fn, fl in mfdelta.iterentries():
347 if not f:
344 if not f:
348 self._err(lr, _(b"entry without name in manifest"))
345 self._err(lr, _(b"entry without name in manifest"))
349 elif f == b"/dev/null": # ignore this in very old repos
346 elif f == b"/dev/null": # ignore this in very old repos
350 continue
347 continue
351 fullpath = dir + _normpath(f)
348 fullpath = dir + _normpath(f)
352 if fl == b't':
349 if fl == b't':
353 if not match.visitdir(fullpath):
350 if not match.visitdir(fullpath):
354 continue
351 continue
355 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
352 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
356 fn, []
353 fn, []
357 ).append(lr)
354 ).append(lr)
358 else:
355 else:
359 if not match(fullpath):
356 if not match(fullpath):
360 continue
357 continue
361 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
358 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
362 except Exception as inst:
359 except Exception as inst:
363 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
360 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
364 if self._level >= VERIFY_FULL:
361 if self._level >= VERIFY_FULL:
365 try:
362 try:
366 # Various issues can affect manifest. So we read each full
363 # Various issues can affect manifest. So we read each full
367 # text from storage. This triggers the checks from the core
364 # text from storage. This triggers the checks from the core
368 # code (eg: hash verification, filename are ordered, etc.)
365 # code (eg: hash verification, filename are ordered, etc.)
369 mfdelta = mfl.get(dir, n).read()
366 mfdelta = mfl.get(dir, n).read()
370 except Exception as inst:
367 except Exception as inst:
371 self._exc(
368 self._exc(
372 lr,
369 lr,
373 _(b"reading full manifest %s") % short(n),
370 _(b"reading full manifest %s") % short(n),
374 inst,
371 inst,
375 label,
372 label,
376 )
373 )
377
374
378 if not dir:
375 if not dir:
379 progress.complete()
376 progress.complete()
380
377
381 if self.havemf:
378 if self.havemf:
382 # since we delete entry in `mflinkrevs` during iteration, any
379 # since we delete entry in `mflinkrevs` during iteration, any
383 # remaining entries are "missing". We need to issue errors for them.
380 # remaining entries are "missing". We need to issue errors for them.
384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
381 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
385 for c, m in sorted(changesetpairs):
382 for c, m in sorted(changesetpairs):
386 if dir:
383 if dir:
387 self._err(
384 self._err(
388 c,
385 c,
389 _(
386 _(
390 b"parent-directory manifest refers to unknown"
387 b"parent-directory manifest refers to unknown"
391 b" revision %s"
388 b" revision %s"
392 )
389 )
393 % short(m),
390 % short(m),
394 label,
391 label,
395 )
392 )
396 else:
393 else:
397 self._err(
394 self._err(
398 c,
395 c,
399 _(b"changeset refers to unknown revision %s")
396 _(b"changeset refers to unknown revision %s")
400 % short(m),
397 % short(m),
401 label,
398 label,
402 )
399 )
403
400
404 if not dir and subdirnodes:
401 if not dir and subdirnodes:
405 self.ui.status(_(b"checking directory manifests\n"))
402 self.ui.status(_(b"checking directory manifests\n"))
406 storefiles = set()
403 storefiles = set()
407 subdirs = set()
404 subdirs = set()
408 revlogv1 = self.revlogv1
405 revlogv1 = self.revlogv1
409 for t, f, f2, size in repo.store.datafiles():
406 for t, f, f2, size in repo.store.datafiles():
410 if not f:
407 if not f:
411 self._err(None, _(b"cannot decode filename '%s'") % f2)
408 self._err(None, _(b"cannot decode filename '%s'") % f2)
412 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
409 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
413 storefiles.add(_normpath(f))
410 storefiles.add(_normpath(f))
414 subdirs.add(os.path.dirname(f))
411 subdirs.add(os.path.dirname(f))
415 subdirprogress = ui.makeprogress(
412 subdirprogress = ui.makeprogress(
416 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
413 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
417 )
414 )
418
415
419 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
416 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
420 subdirfilenodes = self._verifymanifest(
417 subdirfilenodes = self._verifymanifest(
421 linkrevs, subdir, storefiles, subdirprogress
418 linkrevs, subdir, storefiles, subdirprogress
422 )
419 )
423 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
420 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
424 filenodes.setdefault(f, {}).update(onefilenodes)
421 filenodes.setdefault(f, {}).update(onefilenodes)
425
422
426 if not dir and subdirnodes:
423 if not dir and subdirnodes:
427 assert subdirprogress is not None # help pytype
424 assert subdirprogress is not None # help pytype
428 subdirprogress.complete()
425 subdirprogress.complete()
429 if self.warnorphanstorefiles:
426 if self.warnorphanstorefiles:
430 for f in sorted(storefiles):
427 for f in sorted(storefiles):
431 self._warn(_(b"warning: orphan data file '%s'") % f)
428 self._warn(_(b"warning: orphan data file '%s'") % f)
432
429
433 return filenodes
430 return filenodes
434
431
435 def _crosscheckfiles(self, filelinkrevs, filenodes):
432 def _crosscheckfiles(self, filelinkrevs, filenodes):
436 repo = self.repo
433 repo = self.repo
437 ui = self.ui
434 ui = self.ui
438 ui.status(_(b"crosschecking files in changesets and manifests\n"))
435 ui.status(_(b"crosschecking files in changesets and manifests\n"))
439
436
440 total = len(filelinkrevs) + len(filenodes)
437 total = len(filelinkrevs) + len(filenodes)
441 progress = ui.makeprogress(
438 progress = ui.makeprogress(
442 _(b'crosschecking'), unit=_(b'files'), total=total
439 _(b'crosschecking'), unit=_(b'files'), total=total
443 )
440 )
444 if self.havemf:
441 if self.havemf:
445 for f in sorted(filelinkrevs):
442 for f in sorted(filelinkrevs):
446 progress.increment()
443 progress.increment()
447 if f not in filenodes:
444 if f not in filenodes:
448 lr = filelinkrevs[f][0]
445 lr = filelinkrevs[f][0]
449 self._err(lr, _(b"in changeset but not in manifest"), f)
446 self._err(lr, _(b"in changeset but not in manifest"), f)
450
447
451 if self.havecl:
448 if self.havecl:
452 for f in sorted(filenodes):
449 for f in sorted(filenodes):
453 progress.increment()
450 progress.increment()
454 if f not in filelinkrevs:
451 if f not in filelinkrevs:
455 try:
452 try:
456 fl = repo.file(f)
453 fl = repo.file(f)
457 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
454 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
458 except Exception:
455 except Exception:
459 lr = None
456 lr = None
460 self._err(lr, _(b"in manifest but not in changeset"), f)
457 self._err(lr, _(b"in manifest but not in changeset"), f)
461
458
462 progress.complete()
459 progress.complete()
463
460
464 def _verifyfiles(self, filenodes, filelinkrevs):
461 def _verifyfiles(self, filenodes, filelinkrevs):
465 repo = self.repo
462 repo = self.repo
466 ui = self.ui
463 ui = self.ui
467 lrugetctx = self.lrugetctx
464 lrugetctx = self.lrugetctx
468 revlogv1 = self.revlogv1
465 revlogv1 = self.revlogv1
469 havemf = self.havemf
466 havemf = self.havemf
470 ui.status(_(b"checking files\n"))
467 ui.status(_(b"checking files\n"))
471
468
472 storefiles = set()
469 storefiles = set()
473 for rl_type, f, f2, size in repo.store.datafiles():
470 for rl_type, f, f2, size in repo.store.datafiles():
474 if not f:
471 if not f:
475 self._err(None, _(b"cannot decode filename '%s'") % f2)
472 self._err(None, _(b"cannot decode filename '%s'") % f2)
476 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
473 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
477 storefiles.add(_normpath(f))
474 storefiles.add(_normpath(f))
478
475
479 state = {
476 state = {
480 # TODO this assumes revlog storage for changelog.
477 # TODO this assumes revlog storage for changelog.
481 b'expectedversion': self.repo.changelog._format_version,
478 b'expectedversion': self.repo.changelog._format_version,
482 b'skipflags': self.skipflags,
479 b'skipflags': self.skipflags,
483 # experimental config: censor.policy
480 # experimental config: censor.policy
484 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
481 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
485 }
482 }
486
483
487 files = sorted(set(filenodes) | set(filelinkrevs))
484 files = sorted(set(filenodes) | set(filelinkrevs))
488 revisions = 0
485 revisions = 0
489 progress = ui.makeprogress(
486 progress = ui.makeprogress(
490 _(b'checking'), unit=_(b'files'), total=len(files)
487 _(b'checking'), unit=_(b'files'), total=len(files)
491 )
488 )
492 for i, f in enumerate(files):
489 for i, f in enumerate(files):
493 progress.update(i, item=f)
490 progress.update(i, item=f)
494 try:
491 try:
495 linkrevs = filelinkrevs[f]
492 linkrevs = filelinkrevs[f]
496 except KeyError:
493 except KeyError:
497 # in manifest but not in changelog
494 # in manifest but not in changelog
498 linkrevs = []
495 linkrevs = []
499
496
500 if linkrevs:
497 if linkrevs:
501 lr = linkrevs[0]
498 lr = linkrevs[0]
502 else:
499 else:
503 lr = None
500 lr = None
504
501
505 try:
502 try:
506 fl = repo.file(f)
503 fl = repo.file(f)
507 except error.StorageError as e:
504 except error.StorageError as e:
508 self._err(lr, _(b"broken revlog! (%s)") % e, f)
505 self._err(lr, _(b"broken revlog! (%s)") % e, f)
509 continue
506 continue
510
507
511 for ff in fl.files():
508 for ff in fl.files():
512 try:
509 try:
513 storefiles.remove(ff)
510 storefiles.remove(ff)
514 except KeyError:
511 except KeyError:
515 if self.warnorphanstorefiles:
512 if self.warnorphanstorefiles:
516 self._warn(
513 self._warn(
517 _(b" warning: revlog '%s' not in fncache!") % ff
514 _(b" warning: revlog '%s' not in fncache!") % ff
518 )
515 )
519 self.fncachewarned = True
516 self.fncachewarned = True
520
517
521 if not len(fl) and (self.havecl or self.havemf):
518 if not len(fl) and (self.havecl or self.havemf):
522 self._err(lr, _(b"empty or missing %s") % f)
519 self._err(lr, _(b"empty or missing %s") % f)
523 else:
520 else:
524 # Guard against implementations not setting this.
521 # Guard against implementations not setting this.
525 state[b'skipread'] = set()
522 state[b'skipread'] = set()
526 state[b'safe_renamed'] = set()
523 state[b'safe_renamed'] = set()
527
524
528 for problem in fl.verifyintegrity(state):
525 for problem in fl.verifyintegrity(state):
529 if problem.node is not None:
526 if problem.node is not None:
530 linkrev = fl.linkrev(fl.rev(problem.node))
527 linkrev = fl.linkrev(fl.rev(problem.node))
531 else:
528 else:
532 linkrev = None
529 linkrev = None
533
530
534 if problem.warning:
531 if problem.warning:
535 self._warn(problem.warning)
532 self._warn(problem.warning)
536 elif problem.error:
533 elif problem.error:
537 self._err(
534 self._err(
538 linkrev if linkrev is not None else lr,
535 linkrev if linkrev is not None else lr,
539 problem.error,
536 problem.error,
540 f,
537 f,
541 )
538 )
542 else:
539 else:
543 raise error.ProgrammingError(
540 raise error.ProgrammingError(
544 b'problem instance does not set warning or error '
541 b'problem instance does not set warning or error '
545 b'attribute: %s' % problem.msg
542 b'attribute: %s' % problem.msg
546 )
543 )
547
544
548 seen = {}
545 seen = {}
549 for i in fl:
546 for i in fl:
550 revisions += 1
547 revisions += 1
551 n = fl.node(i)
548 n = fl.node(i)
552 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
549 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
553 if f in filenodes:
550 if f in filenodes:
554 if havemf and n not in filenodes[f]:
551 if havemf and n not in filenodes[f]:
555 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
552 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
556 else:
553 else:
557 del filenodes[f][n]
554 del filenodes[f][n]
558
555
559 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
556 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
560 continue
557 continue
561
558
562 # check renames
559 # check renames
563 try:
560 try:
564 # This requires resolving fulltext (at least on revlogs,
561 # This requires resolving fulltext (at least on revlogs,
565 # though not with LFS revisions). We may want
562 # though not with LFS revisions). We may want
566 # ``verifyintegrity()`` to pass a set of nodes with
563 # ``verifyintegrity()`` to pass a set of nodes with
567 # rename metadata as an optimization.
564 # rename metadata as an optimization.
568 rp = fl.renamed(n)
565 rp = fl.renamed(n)
569 if rp:
566 if rp:
570 if lr is not None and ui.verbose:
567 if lr is not None and ui.verbose:
571 ctx = lrugetctx(lr)
568 ctx = lrugetctx(lr)
572 if not any(rp[0] in pctx for pctx in ctx.parents()):
569 if not any(rp[0] in pctx for pctx in ctx.parents()):
573 self._warn(
570 self._warn(
574 _(
571 _(
575 b"warning: copy source of '%s' not"
572 b"warning: copy source of '%s' not"
576 b" in parents of %s"
573 b" in parents of %s"
577 )
574 )
578 % (f, ctx)
575 % (f, ctx)
579 )
576 )
580 fl2 = repo.file(rp[0])
577 fl2 = repo.file(rp[0])
581 if not len(fl2):
578 if not len(fl2):
582 self._err(
579 self._err(
583 lr,
580 lr,
584 _(
581 _(
585 b"empty or missing copy source revlog "
582 b"empty or missing copy source revlog "
586 b"%s:%s"
583 b"%s:%s"
587 )
584 )
588 % (rp[0], short(rp[1])),
585 % (rp[0], short(rp[1])),
589 f,
586 f,
590 )
587 )
591 elif rp[1] == self.repo.nullid:
588 elif rp[1] == self.repo.nullid:
592 ui.note(
589 ui.note(
593 _(
590 _(
594 b"warning: %s@%s: copy source"
591 b"warning: %s@%s: copy source"
595 b" revision is nullid %s:%s\n"
592 b" revision is nullid %s:%s\n"
596 )
593 )
597 % (f, lr, rp[0], short(rp[1]))
594 % (f, lr, rp[0], short(rp[1]))
598 )
595 )
599 else:
596 else:
600 fl2.rev(rp[1])
597 fl2.rev(rp[1])
601 except Exception as inst:
598 except Exception as inst:
602 self._exc(
599 self._exc(
603 lr, _(b"checking rename of %s") % short(n), inst, f
600 lr, _(b"checking rename of %s") % short(n), inst, f
604 )
601 )
605
602
606 # cross-check
603 # cross-check
607 if f in filenodes:
604 if f in filenodes:
608 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
605 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
609 for lr, node in sorted(fns):
606 for lr, node in sorted(fns):
610 self._err(
607 self._err(
611 lr,
608 lr,
612 _(b"manifest refers to unknown revision %s")
609 _(b"manifest refers to unknown revision %s")
613 % short(node),
610 % short(node),
614 f,
611 f,
615 )
612 )
616 progress.complete()
613 progress.complete()
617
614
618 if self.warnorphanstorefiles:
615 if self.warnorphanstorefiles:
619 for f in sorted(storefiles):
616 for f in sorted(storefiles):
620 self._warn(_(b"warning: orphan data file '%s'") % f)
617 self._warn(_(b"warning: orphan data file '%s'") % f)
621
618
622 return len(files), revisions
619 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now