##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48149:0693dc0b default
parent child Browse files
Show More
@@ -1,618 +1,615
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45
45
46 class verifier(object):
46 class verifier(object):
47 def __init__(self, repo, level=None):
47 def __init__(self, repo, level=None):
48 self.repo = repo.unfiltered()
48 self.repo = repo.unfiltered()
49 self.ui = repo.ui
49 self.ui = repo.ui
50 self.match = repo.narrowmatch()
50 self.match = repo.narrowmatch()
51 if level is None:
51 if level is None:
52 level = VERIFY_DEFAULT
52 level = VERIFY_DEFAULT
53 self._level = level
53 self._level = level
54 self.badrevs = set()
54 self.badrevs = set()
55 self.errors = 0
55 self.errors = 0
56 self.warnings = 0
56 self.warnings = 0
57 self.havecl = len(repo.changelog) > 0
57 self.havecl = len(repo.changelog) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
59 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
59 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
61 self.refersmf = False
61 self.refersmf = False
62 self.fncachewarned = False
62 self.fncachewarned = False
63 # developer config: verify.skipflags
63 # developer config: verify.skipflags
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
65 self.warnorphanstorefiles = True
65 self.warnorphanstorefiles = True
66
66
67 def _warn(self, msg):
67 def _warn(self, msg):
68 """record a "warning" level issue"""
68 """record a "warning" level issue"""
69 self.ui.warn(msg + b"\n")
69 self.ui.warn(msg + b"\n")
70 self.warnings += 1
70 self.warnings += 1
71
71
72 def _err(self, linkrev, msg, filename=None):
72 def _err(self, linkrev, msg, filename=None):
73 """record a "error" level issue"""
73 """record a "error" level issue"""
74 if linkrev is not None:
74 if linkrev is not None:
75 self.badrevs.add(linkrev)
75 self.badrevs.add(linkrev)
76 linkrev = b"%d" % linkrev
76 linkrev = b"%d" % linkrev
77 else:
77 else:
78 linkrev = b'?'
78 linkrev = b'?'
79 msg = b"%s: %s" % (linkrev, msg)
79 msg = b"%s: %s" % (linkrev, msg)
80 if filename:
80 if filename:
81 msg = b"%s@%s" % (filename, msg)
81 msg = b"%s@%s" % (filename, msg)
82 self.ui.warn(b" " + msg + b"\n")
82 self.ui.warn(b" " + msg + b"\n")
83 self.errors += 1
83 self.errors += 1
84
84
85 def _exc(self, linkrev, msg, inst, filename=None):
85 def _exc(self, linkrev, msg, inst, filename=None):
86 """record exception raised during the verify process"""
86 """record exception raised during the verify process"""
87 fmsg = stringutil.forcebytestr(inst)
87 fmsg = stringutil.forcebytestr(inst)
88 if not fmsg:
88 if not fmsg:
89 fmsg = pycompat.byterepr(inst)
89 fmsg = pycompat.byterepr(inst)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
91
91
92 def _checkrevlog(self, obj, name, linkrev):
92 def _checkrevlog(self, obj, name, linkrev):
93 """verify high level property of a revlog
93 """verify high level property of a revlog
94
94
95 - revlog is present,
95 - revlog is present,
96 - revlog is non-empty,
96 - revlog is non-empty,
97 - sizes (index and data) are correct,
97 - sizes (index and data) are correct,
98 - revlog's format version is correct.
98 - revlog's format version is correct.
99 """
99 """
100 if not len(obj) and (self.havecl or self.havemf):
100 if not len(obj) and (self.havecl or self.havemf):
101 self._err(linkrev, _(b"empty or missing %s") % name)
101 self._err(linkrev, _(b"empty or missing %s") % name)
102 return
102 return
103
103
104 d = obj.checksize()
104 d = obj.checksize()
105 if d[0]:
105 if d[0]:
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
107 if d[1]:
107 if d[1]:
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
109
109
110 if obj._format_version != revlog.REVLOGV0:
110 if obj._format_version != revlog.REVLOGV0:
111 if not self.revlogv1:
111 if not self.revlogv1:
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
113 elif self.revlogv1:
113 elif self.revlogv1:
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
115
115
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
117 """verify a single revlog entry
117 """verify a single revlog entry
118
118
119 arguments are:
119 arguments are:
120 - obj: the source revlog
120 - obj: the source revlog
121 - i: the revision number
121 - i: the revision number
122 - node: the revision node id
122 - node: the revision node id
123 - seen: nodes previously seen for this revlog
123 - seen: nodes previously seen for this revlog
124 - linkrevs: [changelog-revisions] introducing "node"
124 - linkrevs: [changelog-revisions] introducing "node"
125 - f: string label ("changelog", "manifest", or filename)
125 - f: string label ("changelog", "manifest", or filename)
126
126
127 Performs the following checks:
127 Performs the following checks:
128 - linkrev points to an existing changelog revision,
128 - linkrev points to an existing changelog revision,
129 - linkrev points to a changelog revision that introduces this revision,
129 - linkrev points to a changelog revision that introduces this revision,
130 - linkrev points to the lowest of these changesets,
130 - linkrev points to the lowest of these changesets,
131 - both parents exist in the revlog,
131 - both parents exist in the revlog,
132 - the revision is not duplicated.
132 - the revision is not duplicated.
133
133
134 Return the linkrev of the revision (or None for changelog's revisions).
134 Return the linkrev of the revision (or None for changelog's revisions).
135 """
135 """
136 lr = obj.linkrev(obj.rev(node))
136 lr = obj.linkrev(obj.rev(node))
137 if lr < 0 or (self.havecl and lr not in linkrevs):
137 if lr < 0 or (self.havecl and lr not in linkrevs):
138 if lr < 0 or lr >= len(self.repo.changelog):
138 if lr < 0 or lr >= len(self.repo.changelog):
139 msg = _(b"rev %d points to nonexistent changeset %d")
139 msg = _(b"rev %d points to nonexistent changeset %d")
140 else:
140 else:
141 msg = _(b"rev %d points to unexpected changeset %d")
141 msg = _(b"rev %d points to unexpected changeset %d")
142 self._err(None, msg % (i, lr), f)
142 self._err(None, msg % (i, lr), f)
143 if linkrevs:
143 if linkrevs:
144 if f and len(linkrevs) > 1:
144 if f and len(linkrevs) > 1:
145 try:
145 try:
146 # attempt to filter down to real linkrevs
146 # attempt to filter down to real linkrevs
147 linkrevs = []
147 linkrevs = []
148 for lr in linkrevs:
148 for lr in linkrevs:
149 if self.lrugetctx(lr)[f].filenode() == node:
149 if self.lrugetctx(lr)[f].filenode() == node:
150 linkrevs.append(lr)
150 linkrevs.append(lr)
151 except Exception:
151 except Exception:
152 pass
152 pass
153 msg = _(b" (expected %s)")
153 msg = _(b" (expected %s)")
154 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
154 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
155 self._warn(msg)
155 self._warn(msg)
156 lr = None # can't be trusted
156 lr = None # can't be trusted
157
157
158 try:
158 try:
159 p1, p2 = obj.parents(node)
159 p1, p2 = obj.parents(node)
160 if p1 not in seen and p1 != self.repo.nullid:
160 if p1 not in seen and p1 != self.repo.nullid:
161 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
161 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
162 self._err(lr, msg, f)
162 self._err(lr, msg, f)
163 if p2 not in seen and p2 != self.repo.nullid:
163 if p2 not in seen and p2 != self.repo.nullid:
164 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
164 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
165 self._err(lr, msg, f)
165 self._err(lr, msg, f)
166 except Exception as inst:
166 except Exception as inst:
167 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
167 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
168
168
169 if node in seen:
169 if node in seen:
170 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
170 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
171 seen[node] = i
171 seen[node] = i
172 return lr
172 return lr
173
173
174 def verify(self):
174 def verify(self):
175 """verify the content of the Mercurial repository
175 """verify the content of the Mercurial repository
176
176
177 This method run all verifications, displaying issues as they are found.
177 This method run all verifications, displaying issues as they are found.
178
178
179 return 1 if any error have been encountered, 0 otherwise."""
179 return 1 if any error have been encountered, 0 otherwise."""
180 # initial validation and generic report
180 # initial validation and generic report
181 repo = self.repo
181 repo = self.repo
182 ui = repo.ui
182 ui = repo.ui
183 if not repo.url().startswith(b'file:'):
183 if not repo.url().startswith(b'file:'):
184 raise error.Abort(_(b"cannot verify bundle or remote repos"))
184 raise error.Abort(_(b"cannot verify bundle or remote repos"))
185
185
186 if os.path.exists(repo.sjoin(b"journal")):
186 if os.path.exists(repo.sjoin(b"journal")):
187 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
187 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
188
188
189 if ui.verbose or not self.revlogv1:
189 if ui.verbose or not self.revlogv1:
190 ui.status(
190 ui.status(
191 _(b"repository uses revlog format %d\n")
191 _(b"repository uses revlog format %d\n")
192 % (self.revlogv1 and 1 or 0)
192 % (self.revlogv1 and 1 or 0)
193 )
193 )
194
194
195 # data verification
195 # data verification
196 mflinkrevs, filelinkrevs = self._verifychangelog()
196 mflinkrevs, filelinkrevs = self._verifychangelog()
197 filenodes = self._verifymanifest(mflinkrevs)
197 filenodes = self._verifymanifest(mflinkrevs)
198 del mflinkrevs
198 del mflinkrevs
199 self._crosscheckfiles(filelinkrevs, filenodes)
199 self._crosscheckfiles(filelinkrevs, filenodes)
200 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
200 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
201
201
202 # final report
202 # final report
203 ui.status(
203 ui.status(
204 _(b"checked %d changesets with %d changes to %d files\n")
204 _(b"checked %d changesets with %d changes to %d files\n")
205 % (len(repo.changelog), filerevisions, totalfiles)
205 % (len(repo.changelog), filerevisions, totalfiles)
206 )
206 )
207 if self.warnings:
207 if self.warnings:
208 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
208 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
209 if self.fncachewarned:
209 if self.fncachewarned:
210 ui.warn(HINT_FNCACHE)
210 ui.warn(HINT_FNCACHE)
211 if self.errors:
211 if self.errors:
212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
213 if self.badrevs:
213 if self.badrevs:
214 msg = _(b"(first damaged changeset appears to be %d)\n")
214 msg = _(b"(first damaged changeset appears to be %d)\n")
215 msg %= min(self.badrevs)
215 msg %= min(self.badrevs)
216 ui.warn(msg)
216 ui.warn(msg)
217 return 1
217 return 1
218 return 0
218 return 0
219
219
220 def _verifychangelog(self):
220 def _verifychangelog(self):
221 """verify the changelog of a repository
221 """verify the changelog of a repository
222
222
223 The following checks are performed:
223 The following checks are performed:
224 - all of `_checkrevlog` checks,
224 - all of `_checkrevlog` checks,
225 - all of `_checkentry` checks (for each revisions),
225 - all of `_checkentry` checks (for each revisions),
226 - each revision can be read.
226 - each revision can be read.
227
227
228 The function returns some of the data observed in the changesets as a
228 The function returns some of the data observed in the changesets as a
229 (mflinkrevs, filelinkrevs) tuples:
229 (mflinkrevs, filelinkrevs) tuples:
230 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
230 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
231 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
231 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
232
232
233 If a matcher was specified, filelinkrevs will only contains matched
233 If a matcher was specified, filelinkrevs will only contains matched
234 files.
234 files.
235 """
235 """
236 ui = self.ui
236 ui = self.ui
237 repo = self.repo
237 repo = self.repo
238 match = self.match
238 match = self.match
239 cl = repo.changelog
239 cl = repo.changelog
240
240
241 ui.status(_(b"checking changesets\n"))
241 ui.status(_(b"checking changesets\n"))
242 mflinkrevs = {}
242 mflinkrevs = {}
243 filelinkrevs = {}
243 filelinkrevs = {}
244 seen = {}
244 seen = {}
245 self._checkrevlog(cl, b"changelog", 0)
245 self._checkrevlog(cl, b"changelog", 0)
246 progress = ui.makeprogress(
246 progress = ui.makeprogress(
247 _(b'checking'), unit=_(b'changesets'), total=len(repo)
247 _(b'checking'), unit=_(b'changesets'), total=len(repo)
248 )
248 )
249 for i in repo:
249 for i in repo:
250 progress.update(i)
250 progress.update(i)
251 n = cl.node(i)
251 n = cl.node(i)
252 self._checkentry(cl, i, n, seen, [i], b"changelog")
252 self._checkentry(cl, i, n, seen, [i], b"changelog")
253
253
254 try:
254 try:
255 changes = cl.read(n)
255 changes = cl.read(n)
256 if changes[0] != self.repo.nullid:
256 if changes[0] != self.repo.nullid:
257 mflinkrevs.setdefault(changes[0], []).append(i)
257 mflinkrevs.setdefault(changes[0], []).append(i)
258 self.refersmf = True
258 self.refersmf = True
259 for f in changes[3]:
259 for f in changes[3]:
260 if match(f):
260 if match(f):
261 filelinkrevs.setdefault(_normpath(f), []).append(i)
261 filelinkrevs.setdefault(_normpath(f), []).append(i)
262 except Exception as inst:
262 except Exception as inst:
263 self.refersmf = True
263 self.refersmf = True
264 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
264 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
265 progress.complete()
265 progress.complete()
266 return mflinkrevs, filelinkrevs
266 return mflinkrevs, filelinkrevs
267
267
268 def _verifymanifest(
268 def _verifymanifest(
269 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
269 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
270 ):
270 ):
271 """verify the manifestlog content
271 """verify the manifestlog content
272
272
273 Inputs:
273 Inputs:
274 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
274 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
275 - dir: a subdirectory to check (for tree manifest repo)
275 - dir: a subdirectory to check (for tree manifest repo)
276 - storefiles: set of currently "orphan" files.
276 - storefiles: set of currently "orphan" files.
277 - subdirprogress: a progress object
277 - subdirprogress: a progress object
278
278
279 This function checks:
279 This function checks:
280 * all of `_checkrevlog` checks (for all manifest related revlogs)
280 * all of `_checkrevlog` checks (for all manifest related revlogs)
281 * all of `_checkentry` checks (for all manifest related revisions)
281 * all of `_checkentry` checks (for all manifest related revisions)
282 * nodes for subdirectory exists in the sub-directory manifest
282 * nodes for subdirectory exists in the sub-directory manifest
283 * each manifest entries have a file path
283 * each manifest entries have a file path
284 * each manifest node refered in mflinkrevs exist in the manifest log
284 * each manifest node refered in mflinkrevs exist in the manifest log
285
285
286 If tree manifest is in use and a matchers is specified, only the
286 If tree manifest is in use and a matchers is specified, only the
287 sub-directories matching it will be verified.
287 sub-directories matching it will be verified.
288
288
289 return a two level mapping:
289 return a two level mapping:
290 {"path" -> { filenode -> changelog-revision}}
290 {"path" -> { filenode -> changelog-revision}}
291
291
292 This mapping primarily contains entries for every files in the
292 This mapping primarily contains entries for every files in the
293 repository. In addition, when tree-manifest is used, it also contains
293 repository. In addition, when tree-manifest is used, it also contains
294 sub-directory entries.
294 sub-directory entries.
295
295
296 If a matcher is provided, only matching paths will be included.
296 If a matcher is provided, only matching paths will be included.
297 """
297 """
298 repo = self.repo
298 repo = self.repo
299 ui = self.ui
299 ui = self.ui
300 match = self.match
300 match = self.match
301 mfl = self.repo.manifestlog
301 mfl = self.repo.manifestlog
302 mf = mfl.getstorage(dir)
302 mf = mfl.getstorage(dir)
303
303
304 if not dir:
304 if not dir:
305 self.ui.status(_(b"checking manifests\n"))
305 self.ui.status(_(b"checking manifests\n"))
306
306
307 filenodes = {}
307 filenodes = {}
308 subdirnodes = {}
308 subdirnodes = {}
309 seen = {}
309 seen = {}
310 label = b"manifest"
310 label = b"manifest"
311 if dir:
311 if dir:
312 label = dir
312 label = dir
313 revlogfiles = mf.files()
313 revlogfiles = mf.files()
314 storefiles.difference_update(revlogfiles)
314 storefiles.difference_update(revlogfiles)
315 if subdirprogress: # should be true since we're in a subdirectory
315 if subdirprogress: # should be true since we're in a subdirectory
316 subdirprogress.increment()
316 subdirprogress.increment()
317 if self.refersmf:
317 if self.refersmf:
318 # Do not check manifest if there are only changelog entries with
318 # Do not check manifest if there are only changelog entries with
319 # null manifests.
319 # null manifests.
320 self._checkrevlog(mf._revlog, label, 0)
320 self._checkrevlog(mf._revlog, label, 0)
321 progress = ui.makeprogress(
321 progress = ui.makeprogress(
322 _(b'checking'), unit=_(b'manifests'), total=len(mf)
322 _(b'checking'), unit=_(b'manifests'), total=len(mf)
323 )
323 )
324 for i in mf:
324 for i in mf:
325 if not dir:
325 if not dir:
326 progress.update(i)
326 progress.update(i)
327 n = mf.node(i)
327 n = mf.node(i)
328 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
328 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
329 if n in mflinkrevs:
329 if n in mflinkrevs:
330 del mflinkrevs[n]
330 del mflinkrevs[n]
331 elif dir:
331 elif dir:
332 self._err(
332 msg = _(b"%s not in parent-directory manifest") % short(n)
333 lr,
333 self._err(lr, msg, label)
334 _(b"%s not in parent-directory manifest") % short(n),
335 label,
336 )
337 else:
334 else:
338 self._err(lr, _(b"%s not in changesets") % short(n), label)
335 self._err(lr, _(b"%s not in changesets") % short(n), label)
339
336
340 try:
337 try:
341 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
338 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
342 for f, fn, fl in mfdelta.iterentries():
339 for f, fn, fl in mfdelta.iterentries():
343 if not f:
340 if not f:
344 self._err(lr, _(b"entry without name in manifest"))
341 self._err(lr, _(b"entry without name in manifest"))
345 elif f == b"/dev/null": # ignore this in very old repos
342 elif f == b"/dev/null": # ignore this in very old repos
346 continue
343 continue
347 fullpath = dir + _normpath(f)
344 fullpath = dir + _normpath(f)
348 if fl == b't':
345 if fl == b't':
349 if not match.visitdir(fullpath):
346 if not match.visitdir(fullpath):
350 continue
347 continue
351 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
348 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
352 fn, []
349 fn, []
353 ).append(lr)
350 ).append(lr)
354 else:
351 else:
355 if not match(fullpath):
352 if not match(fullpath):
356 continue
353 continue
357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
354 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
358 except Exception as inst:
355 except Exception as inst:
359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
356 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
360 if self._level >= VERIFY_FULL:
357 if self._level >= VERIFY_FULL:
361 try:
358 try:
362 # Various issues can affect manifest. So we read each full
359 # Various issues can affect manifest. So we read each full
363 # text from storage. This triggers the checks from the core
360 # text from storage. This triggers the checks from the core
364 # code (eg: hash verification, filename are ordered, etc.)
361 # code (eg: hash verification, filename are ordered, etc.)
365 mfdelta = mfl.get(dir, n).read()
362 mfdelta = mfl.get(dir, n).read()
366 except Exception as inst:
363 except Exception as inst:
367 self._exc(
364 self._exc(
368 lr,
365 lr,
369 _(b"reading full manifest %s") % short(n),
366 _(b"reading full manifest %s") % short(n),
370 inst,
367 inst,
371 label,
368 label,
372 )
369 )
373
370
374 if not dir:
371 if not dir:
375 progress.complete()
372 progress.complete()
376
373
377 if self.havemf:
374 if self.havemf:
378 # since we delete entry in `mflinkrevs` during iteration, any
375 # since we delete entry in `mflinkrevs` during iteration, any
379 # remaining entries are "missing". We need to issue errors for them.
376 # remaining entries are "missing". We need to issue errors for them.
380 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
377 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
381 for c, m in sorted(changesetpairs):
378 for c, m in sorted(changesetpairs):
382 if dir:
379 if dir:
383 self._err(
380 self._err(
384 c,
381 c,
385 _(
382 _(
386 b"parent-directory manifest refers to unknown"
383 b"parent-directory manifest refers to unknown"
387 b" revision %s"
384 b" revision %s"
388 )
385 )
389 % short(m),
386 % short(m),
390 label,
387 label,
391 )
388 )
392 else:
389 else:
393 self._err(
390 self._err(
394 c,
391 c,
395 _(b"changeset refers to unknown revision %s")
392 _(b"changeset refers to unknown revision %s")
396 % short(m),
393 % short(m),
397 label,
394 label,
398 )
395 )
399
396
400 if not dir and subdirnodes:
397 if not dir and subdirnodes:
401 self.ui.status(_(b"checking directory manifests\n"))
398 self.ui.status(_(b"checking directory manifests\n"))
402 storefiles = set()
399 storefiles = set()
403 subdirs = set()
400 subdirs = set()
404 revlogv1 = self.revlogv1
401 revlogv1 = self.revlogv1
405 for t, f, f2, size in repo.store.datafiles():
402 for t, f, f2, size in repo.store.datafiles():
406 if not f:
403 if not f:
407 self._err(None, _(b"cannot decode filename '%s'") % f2)
404 self._err(None, _(b"cannot decode filename '%s'") % f2)
408 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
405 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
409 storefiles.add(_normpath(f))
406 storefiles.add(_normpath(f))
410 subdirs.add(os.path.dirname(f))
407 subdirs.add(os.path.dirname(f))
411 subdirprogress = ui.makeprogress(
408 subdirprogress = ui.makeprogress(
412 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
409 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
413 )
410 )
414
411
415 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
412 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
416 subdirfilenodes = self._verifymanifest(
413 subdirfilenodes = self._verifymanifest(
417 linkrevs, subdir, storefiles, subdirprogress
414 linkrevs, subdir, storefiles, subdirprogress
418 )
415 )
419 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
416 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
420 filenodes.setdefault(f, {}).update(onefilenodes)
417 filenodes.setdefault(f, {}).update(onefilenodes)
421
418
422 if not dir and subdirnodes:
419 if not dir and subdirnodes:
423 assert subdirprogress is not None # help pytype
420 assert subdirprogress is not None # help pytype
424 subdirprogress.complete()
421 subdirprogress.complete()
425 if self.warnorphanstorefiles:
422 if self.warnorphanstorefiles:
426 for f in sorted(storefiles):
423 for f in sorted(storefiles):
427 self._warn(_(b"warning: orphan data file '%s'") % f)
424 self._warn(_(b"warning: orphan data file '%s'") % f)
428
425
429 return filenodes
426 return filenodes
430
427
431 def _crosscheckfiles(self, filelinkrevs, filenodes):
428 def _crosscheckfiles(self, filelinkrevs, filenodes):
432 repo = self.repo
429 repo = self.repo
433 ui = self.ui
430 ui = self.ui
434 ui.status(_(b"crosschecking files in changesets and manifests\n"))
431 ui.status(_(b"crosschecking files in changesets and manifests\n"))
435
432
436 total = len(filelinkrevs) + len(filenodes)
433 total = len(filelinkrevs) + len(filenodes)
437 progress = ui.makeprogress(
434 progress = ui.makeprogress(
438 _(b'crosschecking'), unit=_(b'files'), total=total
435 _(b'crosschecking'), unit=_(b'files'), total=total
439 )
436 )
440 if self.havemf:
437 if self.havemf:
441 for f in sorted(filelinkrevs):
438 for f in sorted(filelinkrevs):
442 progress.increment()
439 progress.increment()
443 if f not in filenodes:
440 if f not in filenodes:
444 lr = filelinkrevs[f][0]
441 lr = filelinkrevs[f][0]
445 self._err(lr, _(b"in changeset but not in manifest"), f)
442 self._err(lr, _(b"in changeset but not in manifest"), f)
446
443
447 if self.havecl:
444 if self.havecl:
448 for f in sorted(filenodes):
445 for f in sorted(filenodes):
449 progress.increment()
446 progress.increment()
450 if f not in filelinkrevs:
447 if f not in filelinkrevs:
451 try:
448 try:
452 fl = repo.file(f)
449 fl = repo.file(f)
453 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
450 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
454 except Exception:
451 except Exception:
455 lr = None
452 lr = None
456 self._err(lr, _(b"in manifest but not in changeset"), f)
453 self._err(lr, _(b"in manifest but not in changeset"), f)
457
454
458 progress.complete()
455 progress.complete()
459
456
460 def _verifyfiles(self, filenodes, filelinkrevs):
457 def _verifyfiles(self, filenodes, filelinkrevs):
461 repo = self.repo
458 repo = self.repo
462 ui = self.ui
459 ui = self.ui
463 lrugetctx = self.lrugetctx
460 lrugetctx = self.lrugetctx
464 revlogv1 = self.revlogv1
461 revlogv1 = self.revlogv1
465 havemf = self.havemf
462 havemf = self.havemf
466 ui.status(_(b"checking files\n"))
463 ui.status(_(b"checking files\n"))
467
464
468 storefiles = set()
465 storefiles = set()
469 for rl_type, f, f2, size in repo.store.datafiles():
466 for rl_type, f, f2, size in repo.store.datafiles():
470 if not f:
467 if not f:
471 self._err(None, _(b"cannot decode filename '%s'") % f2)
468 self._err(None, _(b"cannot decode filename '%s'") % f2)
472 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
469 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
473 storefiles.add(_normpath(f))
470 storefiles.add(_normpath(f))
474
471
475 state = {
472 state = {
476 # TODO this assumes revlog storage for changelog.
473 # TODO this assumes revlog storage for changelog.
477 b'expectedversion': self.repo.changelog._format_version,
474 b'expectedversion': self.repo.changelog._format_version,
478 b'skipflags': self.skipflags,
475 b'skipflags': self.skipflags,
479 # experimental config: censor.policy
476 # experimental config: censor.policy
480 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
477 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
481 }
478 }
482
479
483 files = sorted(set(filenodes) | set(filelinkrevs))
480 files = sorted(set(filenodes) | set(filelinkrevs))
484 revisions = 0
481 revisions = 0
485 progress = ui.makeprogress(
482 progress = ui.makeprogress(
486 _(b'checking'), unit=_(b'files'), total=len(files)
483 _(b'checking'), unit=_(b'files'), total=len(files)
487 )
484 )
488 for i, f in enumerate(files):
485 for i, f in enumerate(files):
489 progress.update(i, item=f)
486 progress.update(i, item=f)
490 try:
487 try:
491 linkrevs = filelinkrevs[f]
488 linkrevs = filelinkrevs[f]
492 except KeyError:
489 except KeyError:
493 # in manifest but not in changelog
490 # in manifest but not in changelog
494 linkrevs = []
491 linkrevs = []
495
492
496 if linkrevs:
493 if linkrevs:
497 lr = linkrevs[0]
494 lr = linkrevs[0]
498 else:
495 else:
499 lr = None
496 lr = None
500
497
501 try:
498 try:
502 fl = repo.file(f)
499 fl = repo.file(f)
503 except error.StorageError as e:
500 except error.StorageError as e:
504 self._err(lr, _(b"broken revlog! (%s)") % e, f)
501 self._err(lr, _(b"broken revlog! (%s)") % e, f)
505 continue
502 continue
506
503
507 for ff in fl.files():
504 for ff in fl.files():
508 try:
505 try:
509 storefiles.remove(ff)
506 storefiles.remove(ff)
510 except KeyError:
507 except KeyError:
511 if self.warnorphanstorefiles:
508 if self.warnorphanstorefiles:
512 self._warn(
509 self._warn(
513 _(b" warning: revlog '%s' not in fncache!") % ff
510 _(b" warning: revlog '%s' not in fncache!") % ff
514 )
511 )
515 self.fncachewarned = True
512 self.fncachewarned = True
516
513
517 if not len(fl) and (self.havecl or self.havemf):
514 if not len(fl) and (self.havecl or self.havemf):
518 self._err(lr, _(b"empty or missing %s") % f)
515 self._err(lr, _(b"empty or missing %s") % f)
519 else:
516 else:
520 # Guard against implementations not setting this.
517 # Guard against implementations not setting this.
521 state[b'skipread'] = set()
518 state[b'skipread'] = set()
522 state[b'safe_renamed'] = set()
519 state[b'safe_renamed'] = set()
523
520
524 for problem in fl.verifyintegrity(state):
521 for problem in fl.verifyintegrity(state):
525 if problem.node is not None:
522 if problem.node is not None:
526 linkrev = fl.linkrev(fl.rev(problem.node))
523 linkrev = fl.linkrev(fl.rev(problem.node))
527 else:
524 else:
528 linkrev = None
525 linkrev = None
529
526
530 if problem.warning:
527 if problem.warning:
531 self._warn(problem.warning)
528 self._warn(problem.warning)
532 elif problem.error:
529 elif problem.error:
533 self._err(
530 self._err(
534 linkrev if linkrev is not None else lr,
531 linkrev if linkrev is not None else lr,
535 problem.error,
532 problem.error,
536 f,
533 f,
537 )
534 )
538 else:
535 else:
539 raise error.ProgrammingError(
536 raise error.ProgrammingError(
540 b'problem instance does not set warning or error '
537 b'problem instance does not set warning or error '
541 b'attribute: %s' % problem.msg
538 b'attribute: %s' % problem.msg
542 )
539 )
543
540
544 seen = {}
541 seen = {}
545 for i in fl:
542 for i in fl:
546 revisions += 1
543 revisions += 1
547 n = fl.node(i)
544 n = fl.node(i)
548 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
545 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
549 if f in filenodes:
546 if f in filenodes:
550 if havemf and n not in filenodes[f]:
547 if havemf and n not in filenodes[f]:
551 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
548 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
552 else:
549 else:
553 del filenodes[f][n]
550 del filenodes[f][n]
554
551
555 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
552 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
556 continue
553 continue
557
554
558 # check renames
555 # check renames
559 try:
556 try:
560 # This requires resolving fulltext (at least on revlogs,
557 # This requires resolving fulltext (at least on revlogs,
561 # though not with LFS revisions). We may want
558 # though not with LFS revisions). We may want
562 # ``verifyintegrity()`` to pass a set of nodes with
559 # ``verifyintegrity()`` to pass a set of nodes with
563 # rename metadata as an optimization.
560 # rename metadata as an optimization.
564 rp = fl.renamed(n)
561 rp = fl.renamed(n)
565 if rp:
562 if rp:
566 if lr is not None and ui.verbose:
563 if lr is not None and ui.verbose:
567 ctx = lrugetctx(lr)
564 ctx = lrugetctx(lr)
568 if not any(rp[0] in pctx for pctx in ctx.parents()):
565 if not any(rp[0] in pctx for pctx in ctx.parents()):
569 self._warn(
566 self._warn(
570 _(
567 _(
571 b"warning: copy source of '%s' not"
568 b"warning: copy source of '%s' not"
572 b" in parents of %s"
569 b" in parents of %s"
573 )
570 )
574 % (f, ctx)
571 % (f, ctx)
575 )
572 )
576 fl2 = repo.file(rp[0])
573 fl2 = repo.file(rp[0])
577 if not len(fl2):
574 if not len(fl2):
578 self._err(
575 self._err(
579 lr,
576 lr,
580 _(
577 _(
581 b"empty or missing copy source revlog "
578 b"empty or missing copy source revlog "
582 b"%s:%s"
579 b"%s:%s"
583 )
580 )
584 % (rp[0], short(rp[1])),
581 % (rp[0], short(rp[1])),
585 f,
582 f,
586 )
583 )
587 elif rp[1] == self.repo.nullid:
584 elif rp[1] == self.repo.nullid:
588 ui.note(
585 ui.note(
589 _(
586 _(
590 b"warning: %s@%s: copy source"
587 b"warning: %s@%s: copy source"
591 b" revision is nullid %s:%s\n"
588 b" revision is nullid %s:%s\n"
592 )
589 )
593 % (f, lr, rp[0], short(rp[1]))
590 % (f, lr, rp[0], short(rp[1]))
594 )
591 )
595 else:
592 else:
596 fl2.rev(rp[1])
593 fl2.rev(rp[1])
597 except Exception as inst:
594 except Exception as inst:
598 self._exc(
595 self._exc(
599 lr, _(b"checking rename of %s") % short(n), inst, f
596 lr, _(b"checking rename of %s") % short(n), inst, f
600 )
597 )
601
598
602 # cross-check
599 # cross-check
603 if f in filenodes:
600 if f in filenodes:
604 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
601 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
605 for lr, node in sorted(fns):
602 for lr, node in sorted(fns):
606 self._err(
603 self._err(
607 lr,
604 lr,
608 _(b"manifest refers to unknown revision %s")
605 _(b"manifest refers to unknown revision %s")
609 % short(node),
606 % short(node),
610 f,
607 f,
611 )
608 )
612 progress.complete()
609 progress.complete()
613
610
614 if self.warnorphanstorefiles:
611 if self.warnorphanstorefiles:
615 for f in sorted(storefiles):
612 for f in sorted(storefiles):
616 self._warn(_(b"warning: orphan data file '%s'") % f)
613 self._warn(_(b"warning: orphan data file '%s'") % f)
617
614
618 return len(files), revisions
615 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now