##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48153:80c690bf default
parent child Browse files
Show More
@@ -1,606 +1,603 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
46 b"parent-directory manifest refers to unknown revision %s"
47 )
47 )
48
48
49
49
50 class verifier(object):
50 class verifier(object):
51 def __init__(self, repo, level=None):
51 def __init__(self, repo, level=None):
52 self.repo = repo.unfiltered()
52 self.repo = repo.unfiltered()
53 self.ui = repo.ui
53 self.ui = repo.ui
54 self.match = repo.narrowmatch()
54 self.match = repo.narrowmatch()
55 if level is None:
55 if level is None:
56 level = VERIFY_DEFAULT
56 level = VERIFY_DEFAULT
57 self._level = level
57 self._level = level
58 self.badrevs = set()
58 self.badrevs = set()
59 self.errors = 0
59 self.errors = 0
60 self.warnings = 0
60 self.warnings = 0
61 self.havecl = len(repo.changelog) > 0
61 self.havecl = len(repo.changelog) > 0
62 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
62 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
63 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
63 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
64 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
64 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
65 self.refersmf = False
65 self.refersmf = False
66 self.fncachewarned = False
66 self.fncachewarned = False
67 # developer config: verify.skipflags
67 # developer config: verify.skipflags
68 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
68 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
69 self.warnorphanstorefiles = True
69 self.warnorphanstorefiles = True
70
70
71 def _warn(self, msg):
71 def _warn(self, msg):
72 """record a "warning" level issue"""
72 """record a "warning" level issue"""
73 self.ui.warn(msg + b"\n")
73 self.ui.warn(msg + b"\n")
74 self.warnings += 1
74 self.warnings += 1
75
75
76 def _err(self, linkrev, msg, filename=None):
76 def _err(self, linkrev, msg, filename=None):
77 """record a "error" level issue"""
77 """record a "error" level issue"""
78 if linkrev is not None:
78 if linkrev is not None:
79 self.badrevs.add(linkrev)
79 self.badrevs.add(linkrev)
80 linkrev = b"%d" % linkrev
80 linkrev = b"%d" % linkrev
81 else:
81 else:
82 linkrev = b'?'
82 linkrev = b'?'
83 msg = b"%s: %s" % (linkrev, msg)
83 msg = b"%s: %s" % (linkrev, msg)
84 if filename:
84 if filename:
85 msg = b"%s@%s" % (filename, msg)
85 msg = b"%s@%s" % (filename, msg)
86 self.ui.warn(b" " + msg + b"\n")
86 self.ui.warn(b" " + msg + b"\n")
87 self.errors += 1
87 self.errors += 1
88
88
89 def _exc(self, linkrev, msg, inst, filename=None):
89 def _exc(self, linkrev, msg, inst, filename=None):
90 """record exception raised during the verify process"""
90 """record exception raised during the verify process"""
91 fmsg = stringutil.forcebytestr(inst)
91 fmsg = stringutil.forcebytestr(inst)
92 if not fmsg:
92 if not fmsg:
93 fmsg = pycompat.byterepr(inst)
93 fmsg = pycompat.byterepr(inst)
94 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
94 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
95
95
96 def _checkrevlog(self, obj, name, linkrev):
96 def _checkrevlog(self, obj, name, linkrev):
97 """verify high level property of a revlog
97 """verify high level property of a revlog
98
98
99 - revlog is present,
99 - revlog is present,
100 - revlog is non-empty,
100 - revlog is non-empty,
101 - sizes (index and data) are correct,
101 - sizes (index and data) are correct,
102 - revlog's format version is correct.
102 - revlog's format version is correct.
103 """
103 """
104 if not len(obj) and (self.havecl or self.havemf):
104 if not len(obj) and (self.havecl or self.havemf):
105 self._err(linkrev, _(b"empty or missing %s") % name)
105 self._err(linkrev, _(b"empty or missing %s") % name)
106 return
106 return
107
107
108 d = obj.checksize()
108 d = obj.checksize()
109 if d[0]:
109 if d[0]:
110 self._err(None, _(b"data length off by %d bytes") % d[0], name)
110 self._err(None, _(b"data length off by %d bytes") % d[0], name)
111 if d[1]:
111 if d[1]:
112 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
112 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
113
113
114 if obj._format_version != revlog.REVLOGV0:
114 if obj._format_version != revlog.REVLOGV0:
115 if not self.revlogv1:
115 if not self.revlogv1:
116 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
116 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
117 elif self.revlogv1:
117 elif self.revlogv1:
118 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
118 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
119
119
120 def _checkentry(self, obj, i, node, seen, linkrevs, f):
120 def _checkentry(self, obj, i, node, seen, linkrevs, f):
121 """verify a single revlog entry
121 """verify a single revlog entry
122
122
123 arguments are:
123 arguments are:
124 - obj: the source revlog
124 - obj: the source revlog
125 - i: the revision number
125 - i: the revision number
126 - node: the revision node id
126 - node: the revision node id
127 - seen: nodes previously seen for this revlog
127 - seen: nodes previously seen for this revlog
128 - linkrevs: [changelog-revisions] introducing "node"
128 - linkrevs: [changelog-revisions] introducing "node"
129 - f: string label ("changelog", "manifest", or filename)
129 - f: string label ("changelog", "manifest", or filename)
130
130
131 Performs the following checks:
131 Performs the following checks:
132 - linkrev points to an existing changelog revision,
132 - linkrev points to an existing changelog revision,
133 - linkrev points to a changelog revision that introduces this revision,
133 - linkrev points to a changelog revision that introduces this revision,
134 - linkrev points to the lowest of these changesets,
134 - linkrev points to the lowest of these changesets,
135 - both parents exist in the revlog,
135 - both parents exist in the revlog,
136 - the revision is not duplicated.
136 - the revision is not duplicated.
137
137
138 Return the linkrev of the revision (or None for changelog's revisions).
138 Return the linkrev of the revision (or None for changelog's revisions).
139 """
139 """
140 lr = obj.linkrev(obj.rev(node))
140 lr = obj.linkrev(obj.rev(node))
141 if lr < 0 or (self.havecl and lr not in linkrevs):
141 if lr < 0 or (self.havecl and lr not in linkrevs):
142 if lr < 0 or lr >= len(self.repo.changelog):
142 if lr < 0 or lr >= len(self.repo.changelog):
143 msg = _(b"rev %d points to nonexistent changeset %d")
143 msg = _(b"rev %d points to nonexistent changeset %d")
144 else:
144 else:
145 msg = _(b"rev %d points to unexpected changeset %d")
145 msg = _(b"rev %d points to unexpected changeset %d")
146 self._err(None, msg % (i, lr), f)
146 self._err(None, msg % (i, lr), f)
147 if linkrevs:
147 if linkrevs:
148 if f and len(linkrevs) > 1:
148 if f and len(linkrevs) > 1:
149 try:
149 try:
150 # attempt to filter down to real linkrevs
150 # attempt to filter down to real linkrevs
151 linkrevs = []
151 linkrevs = []
152 for lr in linkrevs:
152 for lr in linkrevs:
153 if self.lrugetctx(lr)[f].filenode() == node:
153 if self.lrugetctx(lr)[f].filenode() == node:
154 linkrevs.append(lr)
154 linkrevs.append(lr)
155 except Exception:
155 except Exception:
156 pass
156 pass
157 msg = _(b" (expected %s)")
157 msg = _(b" (expected %s)")
158 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
158 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
159 self._warn(msg)
159 self._warn(msg)
160 lr = None # can't be trusted
160 lr = None # can't be trusted
161
161
162 try:
162 try:
163 p1, p2 = obj.parents(node)
163 p1, p2 = obj.parents(node)
164 if p1 not in seen and p1 != self.repo.nullid:
164 if p1 not in seen and p1 != self.repo.nullid:
165 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
165 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
166 self._err(lr, msg, f)
166 self._err(lr, msg, f)
167 if p2 not in seen and p2 != self.repo.nullid:
167 if p2 not in seen and p2 != self.repo.nullid:
168 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
168 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
169 self._err(lr, msg, f)
169 self._err(lr, msg, f)
170 except Exception as inst:
170 except Exception as inst:
171 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
171 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
172
172
173 if node in seen:
173 if node in seen:
174 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
174 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
175 seen[node] = i
175 seen[node] = i
176 return lr
176 return lr
177
177
178 def verify(self):
178 def verify(self):
179 """verify the content of the Mercurial repository
179 """verify the content of the Mercurial repository
180
180
181 This method run all verifications, displaying issues as they are found.
181 This method run all verifications, displaying issues as they are found.
182
182
183 return 1 if any error have been encountered, 0 otherwise."""
183 return 1 if any error have been encountered, 0 otherwise."""
184 # initial validation and generic report
184 # initial validation and generic report
185 repo = self.repo
185 repo = self.repo
186 ui = repo.ui
186 ui = repo.ui
187 if not repo.url().startswith(b'file:'):
187 if not repo.url().startswith(b'file:'):
188 raise error.Abort(_(b"cannot verify bundle or remote repos"))
188 raise error.Abort(_(b"cannot verify bundle or remote repos"))
189
189
190 if os.path.exists(repo.sjoin(b"journal")):
190 if os.path.exists(repo.sjoin(b"journal")):
191 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
191 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
192
192
193 if ui.verbose or not self.revlogv1:
193 if ui.verbose or not self.revlogv1:
194 ui.status(
194 ui.status(
195 _(b"repository uses revlog format %d\n")
195 _(b"repository uses revlog format %d\n")
196 % (self.revlogv1 and 1 or 0)
196 % (self.revlogv1 and 1 or 0)
197 )
197 )
198
198
199 # data verification
199 # data verification
200 mflinkrevs, filelinkrevs = self._verifychangelog()
200 mflinkrevs, filelinkrevs = self._verifychangelog()
201 filenodes = self._verifymanifest(mflinkrevs)
201 filenodes = self._verifymanifest(mflinkrevs)
202 del mflinkrevs
202 del mflinkrevs
203 self._crosscheckfiles(filelinkrevs, filenodes)
203 self._crosscheckfiles(filelinkrevs, filenodes)
204 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
204 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
205
205
206 # final report
206 # final report
207 ui.status(
207 ui.status(
208 _(b"checked %d changesets with %d changes to %d files\n")
208 _(b"checked %d changesets with %d changes to %d files\n")
209 % (len(repo.changelog), filerevisions, totalfiles)
209 % (len(repo.changelog), filerevisions, totalfiles)
210 )
210 )
211 if self.warnings:
211 if self.warnings:
212 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
212 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
213 if self.fncachewarned:
213 if self.fncachewarned:
214 ui.warn(HINT_FNCACHE)
214 ui.warn(HINT_FNCACHE)
215 if self.errors:
215 if self.errors:
216 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
216 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
217 if self.badrevs:
217 if self.badrevs:
218 msg = _(b"(first damaged changeset appears to be %d)\n")
218 msg = _(b"(first damaged changeset appears to be %d)\n")
219 msg %= min(self.badrevs)
219 msg %= min(self.badrevs)
220 ui.warn(msg)
220 ui.warn(msg)
221 return 1
221 return 1
222 return 0
222 return 0
223
223
224 def _verifychangelog(self):
224 def _verifychangelog(self):
225 """verify the changelog of a repository
225 """verify the changelog of a repository
226
226
227 The following checks are performed:
227 The following checks are performed:
228 - all of `_checkrevlog` checks,
228 - all of `_checkrevlog` checks,
229 - all of `_checkentry` checks (for each revisions),
229 - all of `_checkentry` checks (for each revisions),
230 - each revision can be read.
230 - each revision can be read.
231
231
232 The function returns some of the data observed in the changesets as a
232 The function returns some of the data observed in the changesets as a
233 (mflinkrevs, filelinkrevs) tuples:
233 (mflinkrevs, filelinkrevs) tuples:
234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
236
236
237 If a matcher was specified, filelinkrevs will only contains matched
237 If a matcher was specified, filelinkrevs will only contains matched
238 files.
238 files.
239 """
239 """
240 ui = self.ui
240 ui = self.ui
241 repo = self.repo
241 repo = self.repo
242 match = self.match
242 match = self.match
243 cl = repo.changelog
243 cl = repo.changelog
244
244
245 ui.status(_(b"checking changesets\n"))
245 ui.status(_(b"checking changesets\n"))
246 mflinkrevs = {}
246 mflinkrevs = {}
247 filelinkrevs = {}
247 filelinkrevs = {}
248 seen = {}
248 seen = {}
249 self._checkrevlog(cl, b"changelog", 0)
249 self._checkrevlog(cl, b"changelog", 0)
250 progress = ui.makeprogress(
250 progress = ui.makeprogress(
251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
252 )
252 )
253 for i in repo:
253 for i in repo:
254 progress.update(i)
254 progress.update(i)
255 n = cl.node(i)
255 n = cl.node(i)
256 self._checkentry(cl, i, n, seen, [i], b"changelog")
256 self._checkentry(cl, i, n, seen, [i], b"changelog")
257
257
258 try:
258 try:
259 changes = cl.read(n)
259 changes = cl.read(n)
260 if changes[0] != self.repo.nullid:
260 if changes[0] != self.repo.nullid:
261 mflinkrevs.setdefault(changes[0], []).append(i)
261 mflinkrevs.setdefault(changes[0], []).append(i)
262 self.refersmf = True
262 self.refersmf = True
263 for f in changes[3]:
263 for f in changes[3]:
264 if match(f):
264 if match(f):
265 filelinkrevs.setdefault(_normpath(f), []).append(i)
265 filelinkrevs.setdefault(_normpath(f), []).append(i)
266 except Exception as inst:
266 except Exception as inst:
267 self.refersmf = True
267 self.refersmf = True
268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
269 progress.complete()
269 progress.complete()
270 return mflinkrevs, filelinkrevs
270 return mflinkrevs, filelinkrevs
271
271
272 def _verifymanifest(
272 def _verifymanifest(
273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
274 ):
274 ):
275 """verify the manifestlog content
275 """verify the manifestlog content
276
276
277 Inputs:
277 Inputs:
278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
279 - dir: a subdirectory to check (for tree manifest repo)
279 - dir: a subdirectory to check (for tree manifest repo)
280 - storefiles: set of currently "orphan" files.
280 - storefiles: set of currently "orphan" files.
281 - subdirprogress: a progress object
281 - subdirprogress: a progress object
282
282
283 This function checks:
283 This function checks:
284 * all of `_checkrevlog` checks (for all manifest related revlogs)
284 * all of `_checkrevlog` checks (for all manifest related revlogs)
285 * all of `_checkentry` checks (for all manifest related revisions)
285 * all of `_checkentry` checks (for all manifest related revisions)
286 * nodes for subdirectory exists in the sub-directory manifest
286 * nodes for subdirectory exists in the sub-directory manifest
287 * each manifest entries have a file path
287 * each manifest entries have a file path
288 * each manifest node refered in mflinkrevs exist in the manifest log
288 * each manifest node refered in mflinkrevs exist in the manifest log
289
289
290 If tree manifest is in use and a matchers is specified, only the
290 If tree manifest is in use and a matchers is specified, only the
291 sub-directories matching it will be verified.
291 sub-directories matching it will be verified.
292
292
293 return a two level mapping:
293 return a two level mapping:
294 {"path" -> { filenode -> changelog-revision}}
294 {"path" -> { filenode -> changelog-revision}}
295
295
296 This mapping primarily contains entries for every files in the
296 This mapping primarily contains entries for every files in the
297 repository. In addition, when tree-manifest is used, it also contains
297 repository. In addition, when tree-manifest is used, it also contains
298 sub-directory entries.
298 sub-directory entries.
299
299
300 If a matcher is provided, only matching paths will be included.
300 If a matcher is provided, only matching paths will be included.
301 """
301 """
302 repo = self.repo
302 repo = self.repo
303 ui = self.ui
303 ui = self.ui
304 match = self.match
304 match = self.match
305 mfl = self.repo.manifestlog
305 mfl = self.repo.manifestlog
306 mf = mfl.getstorage(dir)
306 mf = mfl.getstorage(dir)
307
307
308 if not dir:
308 if not dir:
309 self.ui.status(_(b"checking manifests\n"))
309 self.ui.status(_(b"checking manifests\n"))
310
310
311 filenodes = {}
311 filenodes = {}
312 subdirnodes = {}
312 subdirnodes = {}
313 seen = {}
313 seen = {}
314 label = b"manifest"
314 label = b"manifest"
315 if dir:
315 if dir:
316 label = dir
316 label = dir
317 revlogfiles = mf.files()
317 revlogfiles = mf.files()
318 storefiles.difference_update(revlogfiles)
318 storefiles.difference_update(revlogfiles)
319 if subdirprogress: # should be true since we're in a subdirectory
319 if subdirprogress: # should be true since we're in a subdirectory
320 subdirprogress.increment()
320 subdirprogress.increment()
321 if self.refersmf:
321 if self.refersmf:
322 # Do not check manifest if there are only changelog entries with
322 # Do not check manifest if there are only changelog entries with
323 # null manifests.
323 # null manifests.
324 self._checkrevlog(mf._revlog, label, 0)
324 self._checkrevlog(mf._revlog, label, 0)
325 progress = ui.makeprogress(
325 progress = ui.makeprogress(
326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
327 )
327 )
328 for i in mf:
328 for i in mf:
329 if not dir:
329 if not dir:
330 progress.update(i)
330 progress.update(i)
331 n = mf.node(i)
331 n = mf.node(i)
332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
333 if n in mflinkrevs:
333 if n in mflinkrevs:
334 del mflinkrevs[n]
334 del mflinkrevs[n]
335 elif dir:
335 elif dir:
336 msg = _(b"%s not in parent-directory manifest") % short(n)
336 msg = _(b"%s not in parent-directory manifest") % short(n)
337 self._err(lr, msg, label)
337 self._err(lr, msg, label)
338 else:
338 else:
339 self._err(lr, _(b"%s not in changesets") % short(n), label)
339 self._err(lr, _(b"%s not in changesets") % short(n), label)
340
340
341 try:
341 try:
342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
343 for f, fn, fl in mfdelta.iterentries():
343 for f, fn, fl in mfdelta.iterentries():
344 if not f:
344 if not f:
345 self._err(lr, _(b"entry without name in manifest"))
345 self._err(lr, _(b"entry without name in manifest"))
346 elif f == b"/dev/null": # ignore this in very old repos
346 elif f == b"/dev/null": # ignore this in very old repos
347 continue
347 continue
348 fullpath = dir + _normpath(f)
348 fullpath = dir + _normpath(f)
349 if fl == b't':
349 if fl == b't':
350 if not match.visitdir(fullpath):
350 if not match.visitdir(fullpath):
351 continue
351 continue
352 sdn = subdirnodes.setdefault(fullpath + b'/', {})
352 sdn = subdirnodes.setdefault(fullpath + b'/', {})
353 sdn.setdefault(fn, []).append(lr)
353 sdn.setdefault(fn, []).append(lr)
354 else:
354 else:
355 if not match(fullpath):
355 if not match(fullpath):
356 continue
356 continue
357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
358 except Exception as inst:
358 except Exception as inst:
359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
360 if self._level >= VERIFY_FULL:
360 if self._level >= VERIFY_FULL:
361 try:
361 try:
362 # Various issues can affect manifest. So we read each full
362 # Various issues can affect manifest. So we read each full
363 # text from storage. This triggers the checks from the core
363 # text from storage. This triggers the checks from the core
364 # code (eg: hash verification, filename are ordered, etc.)
364 # code (eg: hash verification, filename are ordered, etc.)
365 mfdelta = mfl.get(dir, n).read()
365 mfdelta = mfl.get(dir, n).read()
366 except Exception as inst:
366 except Exception as inst:
367 msg = _(b"reading full manifest %s") % short(n)
367 msg = _(b"reading full manifest %s") % short(n)
368 self._exc(lr, msg, inst, label)
368 self._exc(lr, msg, inst, label)
369
369
370 if not dir:
370 if not dir:
371 progress.complete()
371 progress.complete()
372
372
373 if self.havemf:
373 if self.havemf:
374 # since we delete entry in `mflinkrevs` during iteration, any
374 # since we delete entry in `mflinkrevs` during iteration, any
375 # remaining entries are "missing". We need to issue errors for them.
375 # remaining entries are "missing". We need to issue errors for them.
376 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
376 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
377 for c, m in sorted(changesetpairs):
377 for c, m in sorted(changesetpairs):
378 if dir:
378 if dir:
379 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
379 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
380 else:
380 else:
381 self._err(
381 msg = _(b"changeset refers to unknown revision %s")
382 c,
382 msg %= short(m)
383 _(b"changeset refers to unknown revision %s")
383 self._err(c, msg, label)
384 % short(m),
385 label,
386 )
387
384
388 if not dir and subdirnodes:
385 if not dir and subdirnodes:
389 self.ui.status(_(b"checking directory manifests\n"))
386 self.ui.status(_(b"checking directory manifests\n"))
390 storefiles = set()
387 storefiles = set()
391 subdirs = set()
388 subdirs = set()
392 revlogv1 = self.revlogv1
389 revlogv1 = self.revlogv1
393 for t, f, f2, size in repo.store.datafiles():
390 for t, f, f2, size in repo.store.datafiles():
394 if not f:
391 if not f:
395 self._err(None, _(b"cannot decode filename '%s'") % f2)
392 self._err(None, _(b"cannot decode filename '%s'") % f2)
396 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
393 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
397 storefiles.add(_normpath(f))
394 storefiles.add(_normpath(f))
398 subdirs.add(os.path.dirname(f))
395 subdirs.add(os.path.dirname(f))
399 subdirprogress = ui.makeprogress(
396 subdirprogress = ui.makeprogress(
400 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
397 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
401 )
398 )
402
399
403 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
400 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
404 subdirfilenodes = self._verifymanifest(
401 subdirfilenodes = self._verifymanifest(
405 linkrevs, subdir, storefiles, subdirprogress
402 linkrevs, subdir, storefiles, subdirprogress
406 )
403 )
407 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
404 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
408 filenodes.setdefault(f, {}).update(onefilenodes)
405 filenodes.setdefault(f, {}).update(onefilenodes)
409
406
410 if not dir and subdirnodes:
407 if not dir and subdirnodes:
411 assert subdirprogress is not None # help pytype
408 assert subdirprogress is not None # help pytype
412 subdirprogress.complete()
409 subdirprogress.complete()
413 if self.warnorphanstorefiles:
410 if self.warnorphanstorefiles:
414 for f in sorted(storefiles):
411 for f in sorted(storefiles):
415 self._warn(_(b"warning: orphan data file '%s'") % f)
412 self._warn(_(b"warning: orphan data file '%s'") % f)
416
413
417 return filenodes
414 return filenodes
418
415
419 def _crosscheckfiles(self, filelinkrevs, filenodes):
416 def _crosscheckfiles(self, filelinkrevs, filenodes):
420 repo = self.repo
417 repo = self.repo
421 ui = self.ui
418 ui = self.ui
422 ui.status(_(b"crosschecking files in changesets and manifests\n"))
419 ui.status(_(b"crosschecking files in changesets and manifests\n"))
423
420
424 total = len(filelinkrevs) + len(filenodes)
421 total = len(filelinkrevs) + len(filenodes)
425 progress = ui.makeprogress(
422 progress = ui.makeprogress(
426 _(b'crosschecking'), unit=_(b'files'), total=total
423 _(b'crosschecking'), unit=_(b'files'), total=total
427 )
424 )
428 if self.havemf:
425 if self.havemf:
429 for f in sorted(filelinkrevs):
426 for f in sorted(filelinkrevs):
430 progress.increment()
427 progress.increment()
431 if f not in filenodes:
428 if f not in filenodes:
432 lr = filelinkrevs[f][0]
429 lr = filelinkrevs[f][0]
433 self._err(lr, _(b"in changeset but not in manifest"), f)
430 self._err(lr, _(b"in changeset but not in manifest"), f)
434
431
435 if self.havecl:
432 if self.havecl:
436 for f in sorted(filenodes):
433 for f in sorted(filenodes):
437 progress.increment()
434 progress.increment()
438 if f not in filelinkrevs:
435 if f not in filelinkrevs:
439 try:
436 try:
440 fl = repo.file(f)
437 fl = repo.file(f)
441 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
438 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
442 except Exception:
439 except Exception:
443 lr = None
440 lr = None
444 self._err(lr, _(b"in manifest but not in changeset"), f)
441 self._err(lr, _(b"in manifest but not in changeset"), f)
445
442
446 progress.complete()
443 progress.complete()
447
444
448 def _verifyfiles(self, filenodes, filelinkrevs):
445 def _verifyfiles(self, filenodes, filelinkrevs):
449 repo = self.repo
446 repo = self.repo
450 ui = self.ui
447 ui = self.ui
451 lrugetctx = self.lrugetctx
448 lrugetctx = self.lrugetctx
452 revlogv1 = self.revlogv1
449 revlogv1 = self.revlogv1
453 havemf = self.havemf
450 havemf = self.havemf
454 ui.status(_(b"checking files\n"))
451 ui.status(_(b"checking files\n"))
455
452
456 storefiles = set()
453 storefiles = set()
457 for rl_type, f, f2, size in repo.store.datafiles():
454 for rl_type, f, f2, size in repo.store.datafiles():
458 if not f:
455 if not f:
459 self._err(None, _(b"cannot decode filename '%s'") % f2)
456 self._err(None, _(b"cannot decode filename '%s'") % f2)
460 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
457 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
461 storefiles.add(_normpath(f))
458 storefiles.add(_normpath(f))
462
459
463 state = {
460 state = {
464 # TODO this assumes revlog storage for changelog.
461 # TODO this assumes revlog storage for changelog.
465 b'expectedversion': self.repo.changelog._format_version,
462 b'expectedversion': self.repo.changelog._format_version,
466 b'skipflags': self.skipflags,
463 b'skipflags': self.skipflags,
467 # experimental config: censor.policy
464 # experimental config: censor.policy
468 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
465 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
469 }
466 }
470
467
471 files = sorted(set(filenodes) | set(filelinkrevs))
468 files = sorted(set(filenodes) | set(filelinkrevs))
472 revisions = 0
469 revisions = 0
473 progress = ui.makeprogress(
470 progress = ui.makeprogress(
474 _(b'checking'), unit=_(b'files'), total=len(files)
471 _(b'checking'), unit=_(b'files'), total=len(files)
475 )
472 )
476 for i, f in enumerate(files):
473 for i, f in enumerate(files):
477 progress.update(i, item=f)
474 progress.update(i, item=f)
478 try:
475 try:
479 linkrevs = filelinkrevs[f]
476 linkrevs = filelinkrevs[f]
480 except KeyError:
477 except KeyError:
481 # in manifest but not in changelog
478 # in manifest but not in changelog
482 linkrevs = []
479 linkrevs = []
483
480
484 if linkrevs:
481 if linkrevs:
485 lr = linkrevs[0]
482 lr = linkrevs[0]
486 else:
483 else:
487 lr = None
484 lr = None
488
485
489 try:
486 try:
490 fl = repo.file(f)
487 fl = repo.file(f)
491 except error.StorageError as e:
488 except error.StorageError as e:
492 self._err(lr, _(b"broken revlog! (%s)") % e, f)
489 self._err(lr, _(b"broken revlog! (%s)") % e, f)
493 continue
490 continue
494
491
495 for ff in fl.files():
492 for ff in fl.files():
496 try:
493 try:
497 storefiles.remove(ff)
494 storefiles.remove(ff)
498 except KeyError:
495 except KeyError:
499 if self.warnorphanstorefiles:
496 if self.warnorphanstorefiles:
500 self._warn(
497 self._warn(
501 _(b" warning: revlog '%s' not in fncache!") % ff
498 _(b" warning: revlog '%s' not in fncache!") % ff
502 )
499 )
503 self.fncachewarned = True
500 self.fncachewarned = True
504
501
505 if not len(fl) and (self.havecl or self.havemf):
502 if not len(fl) and (self.havecl or self.havemf):
506 self._err(lr, _(b"empty or missing %s") % f)
503 self._err(lr, _(b"empty or missing %s") % f)
507 else:
504 else:
508 # Guard against implementations not setting this.
505 # Guard against implementations not setting this.
509 state[b'skipread'] = set()
506 state[b'skipread'] = set()
510 state[b'safe_renamed'] = set()
507 state[b'safe_renamed'] = set()
511
508
512 for problem in fl.verifyintegrity(state):
509 for problem in fl.verifyintegrity(state):
513 if problem.node is not None:
510 if problem.node is not None:
514 linkrev = fl.linkrev(fl.rev(problem.node))
511 linkrev = fl.linkrev(fl.rev(problem.node))
515 else:
512 else:
516 linkrev = None
513 linkrev = None
517
514
518 if problem.warning:
515 if problem.warning:
519 self._warn(problem.warning)
516 self._warn(problem.warning)
520 elif problem.error:
517 elif problem.error:
521 self._err(
518 self._err(
522 linkrev if linkrev is not None else lr,
519 linkrev if linkrev is not None else lr,
523 problem.error,
520 problem.error,
524 f,
521 f,
525 )
522 )
526 else:
523 else:
527 raise error.ProgrammingError(
524 raise error.ProgrammingError(
528 b'problem instance does not set warning or error '
525 b'problem instance does not set warning or error '
529 b'attribute: %s' % problem.msg
526 b'attribute: %s' % problem.msg
530 )
527 )
531
528
532 seen = {}
529 seen = {}
533 for i in fl:
530 for i in fl:
534 revisions += 1
531 revisions += 1
535 n = fl.node(i)
532 n = fl.node(i)
536 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
533 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
537 if f in filenodes:
534 if f in filenodes:
538 if havemf and n not in filenodes[f]:
535 if havemf and n not in filenodes[f]:
539 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
536 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
540 else:
537 else:
541 del filenodes[f][n]
538 del filenodes[f][n]
542
539
543 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
540 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
544 continue
541 continue
545
542
546 # check renames
543 # check renames
547 try:
544 try:
548 # This requires resolving fulltext (at least on revlogs,
545 # This requires resolving fulltext (at least on revlogs,
549 # though not with LFS revisions). We may want
546 # though not with LFS revisions). We may want
550 # ``verifyintegrity()`` to pass a set of nodes with
547 # ``verifyintegrity()`` to pass a set of nodes with
551 # rename metadata as an optimization.
548 # rename metadata as an optimization.
552 rp = fl.renamed(n)
549 rp = fl.renamed(n)
553 if rp:
550 if rp:
554 if lr is not None and ui.verbose:
551 if lr is not None and ui.verbose:
555 ctx = lrugetctx(lr)
552 ctx = lrugetctx(lr)
556 if not any(rp[0] in pctx for pctx in ctx.parents()):
553 if not any(rp[0] in pctx for pctx in ctx.parents()):
557 self._warn(
554 self._warn(
558 _(
555 _(
559 b"warning: copy source of '%s' not"
556 b"warning: copy source of '%s' not"
560 b" in parents of %s"
557 b" in parents of %s"
561 )
558 )
562 % (f, ctx)
559 % (f, ctx)
563 )
560 )
564 fl2 = repo.file(rp[0])
561 fl2 = repo.file(rp[0])
565 if not len(fl2):
562 if not len(fl2):
566 self._err(
563 self._err(
567 lr,
564 lr,
568 _(
565 _(
569 b"empty or missing copy source revlog "
566 b"empty or missing copy source revlog "
570 b"%s:%s"
567 b"%s:%s"
571 )
568 )
572 % (rp[0], short(rp[1])),
569 % (rp[0], short(rp[1])),
573 f,
570 f,
574 )
571 )
575 elif rp[1] == self.repo.nullid:
572 elif rp[1] == self.repo.nullid:
576 ui.note(
573 ui.note(
577 _(
574 _(
578 b"warning: %s@%s: copy source"
575 b"warning: %s@%s: copy source"
579 b" revision is nullid %s:%s\n"
576 b" revision is nullid %s:%s\n"
580 )
577 )
581 % (f, lr, rp[0], short(rp[1]))
578 % (f, lr, rp[0], short(rp[1]))
582 )
579 )
583 else:
580 else:
584 fl2.rev(rp[1])
581 fl2.rev(rp[1])
585 except Exception as inst:
582 except Exception as inst:
586 self._exc(
583 self._exc(
587 lr, _(b"checking rename of %s") % short(n), inst, f
584 lr, _(b"checking rename of %s") % short(n), inst, f
588 )
585 )
589
586
590 # cross-check
587 # cross-check
591 if f in filenodes:
588 if f in filenodes:
592 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
589 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
593 for lr, node in sorted(fns):
590 for lr, node in sorted(fns):
594 self._err(
591 self._err(
595 lr,
592 lr,
596 _(b"manifest refers to unknown revision %s")
593 _(b"manifest refers to unknown revision %s")
597 % short(node),
594 % short(node),
598 f,
595 f,
599 )
596 )
600 progress.complete()
597 progress.complete()
601
598
602 if self.warnorphanstorefiles:
599 if self.warnorphanstorefiles:
603 for f in sorted(storefiles):
600 for f in sorted(storefiles):
604 self._warn(_(b"warning: orphan data file '%s'") % f)
601 self._warn(_(b"warning: orphan data file '%s'") % f)
605
602
606 return len(files), revisions
603 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now