##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48147:ba8a9fbe default
parent child Browse files
Show More
@@ -1,619 +1,619 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
44
45
41 class verifier(object):
46 class verifier(object):
42 def __init__(self, repo, level=None):
47 def __init__(self, repo, level=None):
43 self.repo = repo.unfiltered()
48 self.repo = repo.unfiltered()
44 self.ui = repo.ui
49 self.ui = repo.ui
45 self.match = repo.narrowmatch()
50 self.match = repo.narrowmatch()
46 if level is None:
51 if level is None:
47 level = VERIFY_DEFAULT
52 level = VERIFY_DEFAULT
48 self._level = level
53 self._level = level
49 self.badrevs = set()
54 self.badrevs = set()
50 self.errors = 0
55 self.errors = 0
51 self.warnings = 0
56 self.warnings = 0
52 self.havecl = len(repo.changelog) > 0
57 self.havecl = len(repo.changelog) > 0
53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
59 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
56 self.refersmf = False
61 self.refersmf = False
57 self.fncachewarned = False
62 self.fncachewarned = False
58 # developer config: verify.skipflags
63 # developer config: verify.skipflags
59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
60 self.warnorphanstorefiles = True
65 self.warnorphanstorefiles = True
61
66
62 def _warn(self, msg):
67 def _warn(self, msg):
63 """record a "warning" level issue"""
68 """record a "warning" level issue"""
64 self.ui.warn(msg + b"\n")
69 self.ui.warn(msg + b"\n")
65 self.warnings += 1
70 self.warnings += 1
66
71
67 def _err(self, linkrev, msg, filename=None):
72 def _err(self, linkrev, msg, filename=None):
68 """record a "error" level issue"""
73 """record a "error" level issue"""
69 if linkrev is not None:
74 if linkrev is not None:
70 self.badrevs.add(linkrev)
75 self.badrevs.add(linkrev)
71 linkrev = b"%d" % linkrev
76 linkrev = b"%d" % linkrev
72 else:
77 else:
73 linkrev = b'?'
78 linkrev = b'?'
74 msg = b"%s: %s" % (linkrev, msg)
79 msg = b"%s: %s" % (linkrev, msg)
75 if filename:
80 if filename:
76 msg = b"%s@%s" % (filename, msg)
81 msg = b"%s@%s" % (filename, msg)
77 self.ui.warn(b" " + msg + b"\n")
82 self.ui.warn(b" " + msg + b"\n")
78 self.errors += 1
83 self.errors += 1
79
84
80 def _exc(self, linkrev, msg, inst, filename=None):
85 def _exc(self, linkrev, msg, inst, filename=None):
81 """record exception raised during the verify process"""
86 """record exception raised during the verify process"""
82 fmsg = stringutil.forcebytestr(inst)
87 fmsg = stringutil.forcebytestr(inst)
83 if not fmsg:
88 if not fmsg:
84 fmsg = pycompat.byterepr(inst)
89 fmsg = pycompat.byterepr(inst)
85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
86
91
87 def _checkrevlog(self, obj, name, linkrev):
92 def _checkrevlog(self, obj, name, linkrev):
88 """verify high level property of a revlog
93 """verify high level property of a revlog
89
94
90 - revlog is present,
95 - revlog is present,
91 - revlog is non-empty,
96 - revlog is non-empty,
92 - sizes (index and data) are correct,
97 - sizes (index and data) are correct,
93 - revlog's format version is correct.
98 - revlog's format version is correct.
94 """
99 """
95 if not len(obj) and (self.havecl or self.havemf):
100 if not len(obj) and (self.havecl or self.havemf):
96 self._err(linkrev, _(b"empty or missing %s") % name)
101 self._err(linkrev, _(b"empty or missing %s") % name)
97 return
102 return
98
103
99 d = obj.checksize()
104 d = obj.checksize()
100 if d[0]:
105 if d[0]:
101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
102 if d[1]:
107 if d[1]:
103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
104
109
105 if obj._format_version != revlog.REVLOGV0:
110 if obj._format_version != revlog.REVLOGV0:
106 if not self.revlogv1:
111 if not self.revlogv1:
107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
108 elif self.revlogv1:
113 elif self.revlogv1:
109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
110
115
111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
112 """verify a single revlog entry
117 """verify a single revlog entry
113
118
114 arguments are:
119 arguments are:
115 - obj: the source revlog
120 - obj: the source revlog
116 - i: the revision number
121 - i: the revision number
117 - node: the revision node id
122 - node: the revision node id
118 - seen: nodes previously seen for this revlog
123 - seen: nodes previously seen for this revlog
119 - linkrevs: [changelog-revisions] introducing "node"
124 - linkrevs: [changelog-revisions] introducing "node"
120 - f: string label ("changelog", "manifest", or filename)
125 - f: string label ("changelog", "manifest", or filename)
121
126
122 Performs the following checks:
127 Performs the following checks:
123 - linkrev points to an existing changelog revision,
128 - linkrev points to an existing changelog revision,
124 - linkrev points to a changelog revision that introduces this revision,
129 - linkrev points to a changelog revision that introduces this revision,
125 - linkrev points to the lowest of these changesets,
130 - linkrev points to the lowest of these changesets,
126 - both parents exist in the revlog,
131 - both parents exist in the revlog,
127 - the revision is not duplicated.
132 - the revision is not duplicated.
128
133
129 Return the linkrev of the revision (or None for changelog's revisions).
134 Return the linkrev of the revision (or None for changelog's revisions).
130 """
135 """
131 lr = obj.linkrev(obj.rev(node))
136 lr = obj.linkrev(obj.rev(node))
132 if lr < 0 or (self.havecl and lr not in linkrevs):
137 if lr < 0 or (self.havecl and lr not in linkrevs):
133 if lr < 0 or lr >= len(self.repo.changelog):
138 if lr < 0 or lr >= len(self.repo.changelog):
134 msg = _(b"rev %d points to nonexistent changeset %d")
139 msg = _(b"rev %d points to nonexistent changeset %d")
135 else:
140 else:
136 msg = _(b"rev %d points to unexpected changeset %d")
141 msg = _(b"rev %d points to unexpected changeset %d")
137 self._err(None, msg % (i, lr), f)
142 self._err(None, msg % (i, lr), f)
138 if linkrevs:
143 if linkrevs:
139 if f and len(linkrevs) > 1:
144 if f and len(linkrevs) > 1:
140 try:
145 try:
141 # attempt to filter down to real linkrevs
146 # attempt to filter down to real linkrevs
142 linkrevs = []
147 linkrevs = []
143 for lr in linkrevs:
148 for lr in linkrevs:
144 if self.lrugetctx(lr)[f].filenode() == node:
149 if self.lrugetctx(lr)[f].filenode() == node:
145 linkrevs.append(lr)
150 linkrevs.append(lr)
146 except Exception:
151 except Exception:
147 pass
152 pass
148 msg = _(b" (expected %s)")
153 msg = _(b" (expected %s)")
149 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
154 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
150 self._warn(msg)
155 self._warn(msg)
151 lr = None # can't be trusted
156 lr = None # can't be trusted
152
157
153 try:
158 try:
154 p1, p2 = obj.parents(node)
159 p1, p2 = obj.parents(node)
155 if p1 not in seen and p1 != self.repo.nullid:
160 if p1 not in seen and p1 != self.repo.nullid:
156 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
161 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
157 self._err(lr, msg, f)
162 self._err(lr, msg, f)
158 if p2 not in seen and p2 != self.repo.nullid:
163 if p2 not in seen and p2 != self.repo.nullid:
159 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
164 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
160 self._err(lr, msg, f)
165 self._err(lr, msg, f)
161 except Exception as inst:
166 except Exception as inst:
162 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
167 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
163
168
164 if node in seen:
169 if node in seen:
165 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
170 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
166 seen[node] = i
171 seen[node] = i
167 return lr
172 return lr
168
173
169 def verify(self):
174 def verify(self):
170 """verify the content of the Mercurial repository
175 """verify the content of the Mercurial repository
171
176
172 This method run all verifications, displaying issues as they are found.
177 This method run all verifications, displaying issues as they are found.
173
178
174 return 1 if any error have been encountered, 0 otherwise."""
179 return 1 if any error have been encountered, 0 otherwise."""
175 # initial validation and generic report
180 # initial validation and generic report
176 repo = self.repo
181 repo = self.repo
177 ui = repo.ui
182 ui = repo.ui
178 if not repo.url().startswith(b'file:'):
183 if not repo.url().startswith(b'file:'):
179 raise error.Abort(_(b"cannot verify bundle or remote repos"))
184 raise error.Abort(_(b"cannot verify bundle or remote repos"))
180
185
181 if os.path.exists(repo.sjoin(b"journal")):
186 if os.path.exists(repo.sjoin(b"journal")):
182 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
187 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
183
188
184 if ui.verbose or not self.revlogv1:
189 if ui.verbose or not self.revlogv1:
185 ui.status(
190 ui.status(
186 _(b"repository uses revlog format %d\n")
191 _(b"repository uses revlog format %d\n")
187 % (self.revlogv1 and 1 or 0)
192 % (self.revlogv1 and 1 or 0)
188 )
193 )
189
194
190 # data verification
195 # data verification
191 mflinkrevs, filelinkrevs = self._verifychangelog()
196 mflinkrevs, filelinkrevs = self._verifychangelog()
192 filenodes = self._verifymanifest(mflinkrevs)
197 filenodes = self._verifymanifest(mflinkrevs)
193 del mflinkrevs
198 del mflinkrevs
194 self._crosscheckfiles(filelinkrevs, filenodes)
199 self._crosscheckfiles(filelinkrevs, filenodes)
195 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
200 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
196
201
197 # final report
202 # final report
198 ui.status(
203 ui.status(
199 _(b"checked %d changesets with %d changes to %d files\n")
204 _(b"checked %d changesets with %d changes to %d files\n")
200 % (len(repo.changelog), filerevisions, totalfiles)
205 % (len(repo.changelog), filerevisions, totalfiles)
201 )
206 )
202 if self.warnings:
207 if self.warnings:
203 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
208 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
204 if self.fncachewarned:
209 if self.fncachewarned:
205 ui.warn(
210 ui.warn(HINT_FNCACHE)
206 _(
207 b'hint: run "hg debugrebuildfncache" to recover from '
208 b'corrupt fncache\n'
209 )
210 )
211 if self.errors:
211 if self.errors:
212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
213 if self.badrevs:
213 if self.badrevs:
214 ui.warn(
214 ui.warn(
215 _(b"(first damaged changeset appears to be %d)\n")
215 _(b"(first damaged changeset appears to be %d)\n")
216 % min(self.badrevs)
216 % min(self.badrevs)
217 )
217 )
218 return 1
218 return 1
219 return 0
219 return 0
220
220
221 def _verifychangelog(self):
221 def _verifychangelog(self):
222 """verify the changelog of a repository
222 """verify the changelog of a repository
223
223
224 The following checks are performed:
224 The following checks are performed:
225 - all of `_checkrevlog` checks,
225 - all of `_checkrevlog` checks,
226 - all of `_checkentry` checks (for each revisions),
226 - all of `_checkentry` checks (for each revisions),
227 - each revision can be read.
227 - each revision can be read.
228
228
229 The function returns some of the data observed in the changesets as a
229 The function returns some of the data observed in the changesets as a
230 (mflinkrevs, filelinkrevs) tuples:
230 (mflinkrevs, filelinkrevs) tuples:
231 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
231 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
232 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
232 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
233
233
234 If a matcher was specified, filelinkrevs will only contains matched
234 If a matcher was specified, filelinkrevs will only contains matched
235 files.
235 files.
236 """
236 """
237 ui = self.ui
237 ui = self.ui
238 repo = self.repo
238 repo = self.repo
239 match = self.match
239 match = self.match
240 cl = repo.changelog
240 cl = repo.changelog
241
241
242 ui.status(_(b"checking changesets\n"))
242 ui.status(_(b"checking changesets\n"))
243 mflinkrevs = {}
243 mflinkrevs = {}
244 filelinkrevs = {}
244 filelinkrevs = {}
245 seen = {}
245 seen = {}
246 self._checkrevlog(cl, b"changelog", 0)
246 self._checkrevlog(cl, b"changelog", 0)
247 progress = ui.makeprogress(
247 progress = ui.makeprogress(
248 _(b'checking'), unit=_(b'changesets'), total=len(repo)
248 _(b'checking'), unit=_(b'changesets'), total=len(repo)
249 )
249 )
250 for i in repo:
250 for i in repo:
251 progress.update(i)
251 progress.update(i)
252 n = cl.node(i)
252 n = cl.node(i)
253 self._checkentry(cl, i, n, seen, [i], b"changelog")
253 self._checkentry(cl, i, n, seen, [i], b"changelog")
254
254
255 try:
255 try:
256 changes = cl.read(n)
256 changes = cl.read(n)
257 if changes[0] != self.repo.nullid:
257 if changes[0] != self.repo.nullid:
258 mflinkrevs.setdefault(changes[0], []).append(i)
258 mflinkrevs.setdefault(changes[0], []).append(i)
259 self.refersmf = True
259 self.refersmf = True
260 for f in changes[3]:
260 for f in changes[3]:
261 if match(f):
261 if match(f):
262 filelinkrevs.setdefault(_normpath(f), []).append(i)
262 filelinkrevs.setdefault(_normpath(f), []).append(i)
263 except Exception as inst:
263 except Exception as inst:
264 self.refersmf = True
264 self.refersmf = True
265 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
265 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
266 progress.complete()
266 progress.complete()
267 return mflinkrevs, filelinkrevs
267 return mflinkrevs, filelinkrevs
268
268
269 def _verifymanifest(
269 def _verifymanifest(
270 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
270 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
271 ):
271 ):
272 """verify the manifestlog content
272 """verify the manifestlog content
273
273
274 Inputs:
274 Inputs:
275 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
275 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
276 - dir: a subdirectory to check (for tree manifest repo)
276 - dir: a subdirectory to check (for tree manifest repo)
277 - storefiles: set of currently "orphan" files.
277 - storefiles: set of currently "orphan" files.
278 - subdirprogress: a progress object
278 - subdirprogress: a progress object
279
279
280 This function checks:
280 This function checks:
281 * all of `_checkrevlog` checks (for all manifest related revlogs)
281 * all of `_checkrevlog` checks (for all manifest related revlogs)
282 * all of `_checkentry` checks (for all manifest related revisions)
282 * all of `_checkentry` checks (for all manifest related revisions)
283 * nodes for subdirectory exists in the sub-directory manifest
283 * nodes for subdirectory exists in the sub-directory manifest
284 * each manifest entries have a file path
284 * each manifest entries have a file path
285 * each manifest node refered in mflinkrevs exist in the manifest log
285 * each manifest node refered in mflinkrevs exist in the manifest log
286
286
287 If tree manifest is in use and a matchers is specified, only the
287 If tree manifest is in use and a matchers is specified, only the
288 sub-directories matching it will be verified.
288 sub-directories matching it will be verified.
289
289
290 return a two level mapping:
290 return a two level mapping:
291 {"path" -> { filenode -> changelog-revision}}
291 {"path" -> { filenode -> changelog-revision}}
292
292
293 This mapping primarily contains entries for every files in the
293 This mapping primarily contains entries for every files in the
294 repository. In addition, when tree-manifest is used, it also contains
294 repository. In addition, when tree-manifest is used, it also contains
295 sub-directory entries.
295 sub-directory entries.
296
296
297 If a matcher is provided, only matching paths will be included.
297 If a matcher is provided, only matching paths will be included.
298 """
298 """
299 repo = self.repo
299 repo = self.repo
300 ui = self.ui
300 ui = self.ui
301 match = self.match
301 match = self.match
302 mfl = self.repo.manifestlog
302 mfl = self.repo.manifestlog
303 mf = mfl.getstorage(dir)
303 mf = mfl.getstorage(dir)
304
304
305 if not dir:
305 if not dir:
306 self.ui.status(_(b"checking manifests\n"))
306 self.ui.status(_(b"checking manifests\n"))
307
307
308 filenodes = {}
308 filenodes = {}
309 subdirnodes = {}
309 subdirnodes = {}
310 seen = {}
310 seen = {}
311 label = b"manifest"
311 label = b"manifest"
312 if dir:
312 if dir:
313 label = dir
313 label = dir
314 revlogfiles = mf.files()
314 revlogfiles = mf.files()
315 storefiles.difference_update(revlogfiles)
315 storefiles.difference_update(revlogfiles)
316 if subdirprogress: # should be true since we're in a subdirectory
316 if subdirprogress: # should be true since we're in a subdirectory
317 subdirprogress.increment()
317 subdirprogress.increment()
318 if self.refersmf:
318 if self.refersmf:
319 # Do not check manifest if there are only changelog entries with
319 # Do not check manifest if there are only changelog entries with
320 # null manifests.
320 # null manifests.
321 self._checkrevlog(mf._revlog, label, 0)
321 self._checkrevlog(mf._revlog, label, 0)
322 progress = ui.makeprogress(
322 progress = ui.makeprogress(
323 _(b'checking'), unit=_(b'manifests'), total=len(mf)
323 _(b'checking'), unit=_(b'manifests'), total=len(mf)
324 )
324 )
325 for i in mf:
325 for i in mf:
326 if not dir:
326 if not dir:
327 progress.update(i)
327 progress.update(i)
328 n = mf.node(i)
328 n = mf.node(i)
329 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
329 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
330 if n in mflinkrevs:
330 if n in mflinkrevs:
331 del mflinkrevs[n]
331 del mflinkrevs[n]
332 elif dir:
332 elif dir:
333 self._err(
333 self._err(
334 lr,
334 lr,
335 _(b"%s not in parent-directory manifest") % short(n),
335 _(b"%s not in parent-directory manifest") % short(n),
336 label,
336 label,
337 )
337 )
338 else:
338 else:
339 self._err(lr, _(b"%s not in changesets") % short(n), label)
339 self._err(lr, _(b"%s not in changesets") % short(n), label)
340
340
341 try:
341 try:
342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
343 for f, fn, fl in mfdelta.iterentries():
343 for f, fn, fl in mfdelta.iterentries():
344 if not f:
344 if not f:
345 self._err(lr, _(b"entry without name in manifest"))
345 self._err(lr, _(b"entry without name in manifest"))
346 elif f == b"/dev/null": # ignore this in very old repos
346 elif f == b"/dev/null": # ignore this in very old repos
347 continue
347 continue
348 fullpath = dir + _normpath(f)
348 fullpath = dir + _normpath(f)
349 if fl == b't':
349 if fl == b't':
350 if not match.visitdir(fullpath):
350 if not match.visitdir(fullpath):
351 continue
351 continue
352 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
352 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
353 fn, []
353 fn, []
354 ).append(lr)
354 ).append(lr)
355 else:
355 else:
356 if not match(fullpath):
356 if not match(fullpath):
357 continue
357 continue
358 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
358 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
359 except Exception as inst:
359 except Exception as inst:
360 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
360 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
361 if self._level >= VERIFY_FULL:
361 if self._level >= VERIFY_FULL:
362 try:
362 try:
363 # Various issues can affect manifest. So we read each full
363 # Various issues can affect manifest. So we read each full
364 # text from storage. This triggers the checks from the core
364 # text from storage. This triggers the checks from the core
365 # code (eg: hash verification, filename are ordered, etc.)
365 # code (eg: hash verification, filename are ordered, etc.)
366 mfdelta = mfl.get(dir, n).read()
366 mfdelta = mfl.get(dir, n).read()
367 except Exception as inst:
367 except Exception as inst:
368 self._exc(
368 self._exc(
369 lr,
369 lr,
370 _(b"reading full manifest %s") % short(n),
370 _(b"reading full manifest %s") % short(n),
371 inst,
371 inst,
372 label,
372 label,
373 )
373 )
374
374
375 if not dir:
375 if not dir:
376 progress.complete()
376 progress.complete()
377
377
378 if self.havemf:
378 if self.havemf:
379 # since we delete entry in `mflinkrevs` during iteration, any
379 # since we delete entry in `mflinkrevs` during iteration, any
380 # remaining entries are "missing". We need to issue errors for them.
380 # remaining entries are "missing". We need to issue errors for them.
381 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
381 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
382 for c, m in sorted(changesetpairs):
382 for c, m in sorted(changesetpairs):
383 if dir:
383 if dir:
384 self._err(
384 self._err(
385 c,
385 c,
386 _(
386 _(
387 b"parent-directory manifest refers to unknown"
387 b"parent-directory manifest refers to unknown"
388 b" revision %s"
388 b" revision %s"
389 )
389 )
390 % short(m),
390 % short(m),
391 label,
391 label,
392 )
392 )
393 else:
393 else:
394 self._err(
394 self._err(
395 c,
395 c,
396 _(b"changeset refers to unknown revision %s")
396 _(b"changeset refers to unknown revision %s")
397 % short(m),
397 % short(m),
398 label,
398 label,
399 )
399 )
400
400
401 if not dir and subdirnodes:
401 if not dir and subdirnodes:
402 self.ui.status(_(b"checking directory manifests\n"))
402 self.ui.status(_(b"checking directory manifests\n"))
403 storefiles = set()
403 storefiles = set()
404 subdirs = set()
404 subdirs = set()
405 revlogv1 = self.revlogv1
405 revlogv1 = self.revlogv1
406 for t, f, f2, size in repo.store.datafiles():
406 for t, f, f2, size in repo.store.datafiles():
407 if not f:
407 if not f:
408 self._err(None, _(b"cannot decode filename '%s'") % f2)
408 self._err(None, _(b"cannot decode filename '%s'") % f2)
409 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
409 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
410 storefiles.add(_normpath(f))
410 storefiles.add(_normpath(f))
411 subdirs.add(os.path.dirname(f))
411 subdirs.add(os.path.dirname(f))
412 subdirprogress = ui.makeprogress(
412 subdirprogress = ui.makeprogress(
413 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
413 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
414 )
414 )
415
415
416 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
416 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
417 subdirfilenodes = self._verifymanifest(
417 subdirfilenodes = self._verifymanifest(
418 linkrevs, subdir, storefiles, subdirprogress
418 linkrevs, subdir, storefiles, subdirprogress
419 )
419 )
420 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
420 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
421 filenodes.setdefault(f, {}).update(onefilenodes)
421 filenodes.setdefault(f, {}).update(onefilenodes)
422
422
423 if not dir and subdirnodes:
423 if not dir and subdirnodes:
424 assert subdirprogress is not None # help pytype
424 assert subdirprogress is not None # help pytype
425 subdirprogress.complete()
425 subdirprogress.complete()
426 if self.warnorphanstorefiles:
426 if self.warnorphanstorefiles:
427 for f in sorted(storefiles):
427 for f in sorted(storefiles):
428 self._warn(_(b"warning: orphan data file '%s'") % f)
428 self._warn(_(b"warning: orphan data file '%s'") % f)
429
429
430 return filenodes
430 return filenodes
431
431
432 def _crosscheckfiles(self, filelinkrevs, filenodes):
432 def _crosscheckfiles(self, filelinkrevs, filenodes):
433 repo = self.repo
433 repo = self.repo
434 ui = self.ui
434 ui = self.ui
435 ui.status(_(b"crosschecking files in changesets and manifests\n"))
435 ui.status(_(b"crosschecking files in changesets and manifests\n"))
436
436
437 total = len(filelinkrevs) + len(filenodes)
437 total = len(filelinkrevs) + len(filenodes)
438 progress = ui.makeprogress(
438 progress = ui.makeprogress(
439 _(b'crosschecking'), unit=_(b'files'), total=total
439 _(b'crosschecking'), unit=_(b'files'), total=total
440 )
440 )
441 if self.havemf:
441 if self.havemf:
442 for f in sorted(filelinkrevs):
442 for f in sorted(filelinkrevs):
443 progress.increment()
443 progress.increment()
444 if f not in filenodes:
444 if f not in filenodes:
445 lr = filelinkrevs[f][0]
445 lr = filelinkrevs[f][0]
446 self._err(lr, _(b"in changeset but not in manifest"), f)
446 self._err(lr, _(b"in changeset but not in manifest"), f)
447
447
448 if self.havecl:
448 if self.havecl:
449 for f in sorted(filenodes):
449 for f in sorted(filenodes):
450 progress.increment()
450 progress.increment()
451 if f not in filelinkrevs:
451 if f not in filelinkrevs:
452 try:
452 try:
453 fl = repo.file(f)
453 fl = repo.file(f)
454 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
454 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
455 except Exception:
455 except Exception:
456 lr = None
456 lr = None
457 self._err(lr, _(b"in manifest but not in changeset"), f)
457 self._err(lr, _(b"in manifest but not in changeset"), f)
458
458
459 progress.complete()
459 progress.complete()
460
460
461 def _verifyfiles(self, filenodes, filelinkrevs):
461 def _verifyfiles(self, filenodes, filelinkrevs):
462 repo = self.repo
462 repo = self.repo
463 ui = self.ui
463 ui = self.ui
464 lrugetctx = self.lrugetctx
464 lrugetctx = self.lrugetctx
465 revlogv1 = self.revlogv1
465 revlogv1 = self.revlogv1
466 havemf = self.havemf
466 havemf = self.havemf
467 ui.status(_(b"checking files\n"))
467 ui.status(_(b"checking files\n"))
468
468
469 storefiles = set()
469 storefiles = set()
470 for rl_type, f, f2, size in repo.store.datafiles():
470 for rl_type, f, f2, size in repo.store.datafiles():
471 if not f:
471 if not f:
472 self._err(None, _(b"cannot decode filename '%s'") % f2)
472 self._err(None, _(b"cannot decode filename '%s'") % f2)
473 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
473 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
474 storefiles.add(_normpath(f))
474 storefiles.add(_normpath(f))
475
475
476 state = {
476 state = {
477 # TODO this assumes revlog storage for changelog.
477 # TODO this assumes revlog storage for changelog.
478 b'expectedversion': self.repo.changelog._format_version,
478 b'expectedversion': self.repo.changelog._format_version,
479 b'skipflags': self.skipflags,
479 b'skipflags': self.skipflags,
480 # experimental config: censor.policy
480 # experimental config: censor.policy
481 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
481 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
482 }
482 }
483
483
484 files = sorted(set(filenodes) | set(filelinkrevs))
484 files = sorted(set(filenodes) | set(filelinkrevs))
485 revisions = 0
485 revisions = 0
486 progress = ui.makeprogress(
486 progress = ui.makeprogress(
487 _(b'checking'), unit=_(b'files'), total=len(files)
487 _(b'checking'), unit=_(b'files'), total=len(files)
488 )
488 )
489 for i, f in enumerate(files):
489 for i, f in enumerate(files):
490 progress.update(i, item=f)
490 progress.update(i, item=f)
491 try:
491 try:
492 linkrevs = filelinkrevs[f]
492 linkrevs = filelinkrevs[f]
493 except KeyError:
493 except KeyError:
494 # in manifest but not in changelog
494 # in manifest but not in changelog
495 linkrevs = []
495 linkrevs = []
496
496
497 if linkrevs:
497 if linkrevs:
498 lr = linkrevs[0]
498 lr = linkrevs[0]
499 else:
499 else:
500 lr = None
500 lr = None
501
501
502 try:
502 try:
503 fl = repo.file(f)
503 fl = repo.file(f)
504 except error.StorageError as e:
504 except error.StorageError as e:
505 self._err(lr, _(b"broken revlog! (%s)") % e, f)
505 self._err(lr, _(b"broken revlog! (%s)") % e, f)
506 continue
506 continue
507
507
508 for ff in fl.files():
508 for ff in fl.files():
509 try:
509 try:
510 storefiles.remove(ff)
510 storefiles.remove(ff)
511 except KeyError:
511 except KeyError:
512 if self.warnorphanstorefiles:
512 if self.warnorphanstorefiles:
513 self._warn(
513 self._warn(
514 _(b" warning: revlog '%s' not in fncache!") % ff
514 _(b" warning: revlog '%s' not in fncache!") % ff
515 )
515 )
516 self.fncachewarned = True
516 self.fncachewarned = True
517
517
518 if not len(fl) and (self.havecl or self.havemf):
518 if not len(fl) and (self.havecl or self.havemf):
519 self._err(lr, _(b"empty or missing %s") % f)
519 self._err(lr, _(b"empty or missing %s") % f)
520 else:
520 else:
521 # Guard against implementations not setting this.
521 # Guard against implementations not setting this.
522 state[b'skipread'] = set()
522 state[b'skipread'] = set()
523 state[b'safe_renamed'] = set()
523 state[b'safe_renamed'] = set()
524
524
525 for problem in fl.verifyintegrity(state):
525 for problem in fl.verifyintegrity(state):
526 if problem.node is not None:
526 if problem.node is not None:
527 linkrev = fl.linkrev(fl.rev(problem.node))
527 linkrev = fl.linkrev(fl.rev(problem.node))
528 else:
528 else:
529 linkrev = None
529 linkrev = None
530
530
531 if problem.warning:
531 if problem.warning:
532 self._warn(problem.warning)
532 self._warn(problem.warning)
533 elif problem.error:
533 elif problem.error:
534 self._err(
534 self._err(
535 linkrev if linkrev is not None else lr,
535 linkrev if linkrev is not None else lr,
536 problem.error,
536 problem.error,
537 f,
537 f,
538 )
538 )
539 else:
539 else:
540 raise error.ProgrammingError(
540 raise error.ProgrammingError(
541 b'problem instance does not set warning or error '
541 b'problem instance does not set warning or error '
542 b'attribute: %s' % problem.msg
542 b'attribute: %s' % problem.msg
543 )
543 )
544
544
545 seen = {}
545 seen = {}
546 for i in fl:
546 for i in fl:
547 revisions += 1
547 revisions += 1
548 n = fl.node(i)
548 n = fl.node(i)
549 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
549 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
550 if f in filenodes:
550 if f in filenodes:
551 if havemf and n not in filenodes[f]:
551 if havemf and n not in filenodes[f]:
552 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
552 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
553 else:
553 else:
554 del filenodes[f][n]
554 del filenodes[f][n]
555
555
556 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
556 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
557 continue
557 continue
558
558
559 # check renames
559 # check renames
560 try:
560 try:
561 # This requires resolving fulltext (at least on revlogs,
561 # This requires resolving fulltext (at least on revlogs,
562 # though not with LFS revisions). We may want
562 # though not with LFS revisions). We may want
563 # ``verifyintegrity()`` to pass a set of nodes with
563 # ``verifyintegrity()`` to pass a set of nodes with
564 # rename metadata as an optimization.
564 # rename metadata as an optimization.
565 rp = fl.renamed(n)
565 rp = fl.renamed(n)
566 if rp:
566 if rp:
567 if lr is not None and ui.verbose:
567 if lr is not None and ui.verbose:
568 ctx = lrugetctx(lr)
568 ctx = lrugetctx(lr)
569 if not any(rp[0] in pctx for pctx in ctx.parents()):
569 if not any(rp[0] in pctx for pctx in ctx.parents()):
570 self._warn(
570 self._warn(
571 _(
571 _(
572 b"warning: copy source of '%s' not"
572 b"warning: copy source of '%s' not"
573 b" in parents of %s"
573 b" in parents of %s"
574 )
574 )
575 % (f, ctx)
575 % (f, ctx)
576 )
576 )
577 fl2 = repo.file(rp[0])
577 fl2 = repo.file(rp[0])
578 if not len(fl2):
578 if not len(fl2):
579 self._err(
579 self._err(
580 lr,
580 lr,
581 _(
581 _(
582 b"empty or missing copy source revlog "
582 b"empty or missing copy source revlog "
583 b"%s:%s"
583 b"%s:%s"
584 )
584 )
585 % (rp[0], short(rp[1])),
585 % (rp[0], short(rp[1])),
586 f,
586 f,
587 )
587 )
588 elif rp[1] == self.repo.nullid:
588 elif rp[1] == self.repo.nullid:
589 ui.note(
589 ui.note(
590 _(
590 _(
591 b"warning: %s@%s: copy source"
591 b"warning: %s@%s: copy source"
592 b" revision is nullid %s:%s\n"
592 b" revision is nullid %s:%s\n"
593 )
593 )
594 % (f, lr, rp[0], short(rp[1]))
594 % (f, lr, rp[0], short(rp[1]))
595 )
595 )
596 else:
596 else:
597 fl2.rev(rp[1])
597 fl2.rev(rp[1])
598 except Exception as inst:
598 except Exception as inst:
599 self._exc(
599 self._exc(
600 lr, _(b"checking rename of %s") % short(n), inst, f
600 lr, _(b"checking rename of %s") % short(n), inst, f
601 )
601 )
602
602
603 # cross-check
603 # cross-check
604 if f in filenodes:
604 if f in filenodes:
605 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
605 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
606 for lr, node in sorted(fns):
606 for lr, node in sorted(fns):
607 self._err(
607 self._err(
608 lr,
608 lr,
609 _(b"manifest refers to unknown revision %s")
609 _(b"manifest refers to unknown revision %s")
610 % short(node),
610 % short(node),
611 f,
611 f,
612 )
612 )
613 progress.complete()
613 progress.complete()
614
614
615 if self.warnorphanstorefiles:
615 if self.warnorphanstorefiles:
616 for f in sorted(storefiles):
616 for f in sorted(storefiles):
617 self._warn(_(b"warning: orphan data file '%s'") % f)
617 self._warn(_(b"warning: orphan data file '%s'") % f)
618
618
619 return len(files), revisions
619 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now