##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48154:f39df554 default
parent child Browse files
Show More
@@ -1,603 +1,602 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
46 b"parent-directory manifest refers to unknown revision %s"
47 )
47 )
48
48
49
49
50 class verifier(object):
50 class verifier(object):
51 def __init__(self, repo, level=None):
51 def __init__(self, repo, level=None):
52 self.repo = repo.unfiltered()
52 self.repo = repo.unfiltered()
53 self.ui = repo.ui
53 self.ui = repo.ui
54 self.match = repo.narrowmatch()
54 self.match = repo.narrowmatch()
55 if level is None:
55 if level is None:
56 level = VERIFY_DEFAULT
56 level = VERIFY_DEFAULT
57 self._level = level
57 self._level = level
58 self.badrevs = set()
58 self.badrevs = set()
59 self.errors = 0
59 self.errors = 0
60 self.warnings = 0
60 self.warnings = 0
61 self.havecl = len(repo.changelog) > 0
61 self.havecl = len(repo.changelog) > 0
62 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
62 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
63 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
63 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
64 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
64 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
65 self.refersmf = False
65 self.refersmf = False
66 self.fncachewarned = False
66 self.fncachewarned = False
67 # developer config: verify.skipflags
67 # developer config: verify.skipflags
68 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
68 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
69 self.warnorphanstorefiles = True
69 self.warnorphanstorefiles = True
70
70
71 def _warn(self, msg):
71 def _warn(self, msg):
72 """record a "warning" level issue"""
72 """record a "warning" level issue"""
73 self.ui.warn(msg + b"\n")
73 self.ui.warn(msg + b"\n")
74 self.warnings += 1
74 self.warnings += 1
75
75
76 def _err(self, linkrev, msg, filename=None):
76 def _err(self, linkrev, msg, filename=None):
77 """record a "error" level issue"""
77 """record a "error" level issue"""
78 if linkrev is not None:
78 if linkrev is not None:
79 self.badrevs.add(linkrev)
79 self.badrevs.add(linkrev)
80 linkrev = b"%d" % linkrev
80 linkrev = b"%d" % linkrev
81 else:
81 else:
82 linkrev = b'?'
82 linkrev = b'?'
83 msg = b"%s: %s" % (linkrev, msg)
83 msg = b"%s: %s" % (linkrev, msg)
84 if filename:
84 if filename:
85 msg = b"%s@%s" % (filename, msg)
85 msg = b"%s@%s" % (filename, msg)
86 self.ui.warn(b" " + msg + b"\n")
86 self.ui.warn(b" " + msg + b"\n")
87 self.errors += 1
87 self.errors += 1
88
88
89 def _exc(self, linkrev, msg, inst, filename=None):
89 def _exc(self, linkrev, msg, inst, filename=None):
90 """record exception raised during the verify process"""
90 """record exception raised during the verify process"""
91 fmsg = stringutil.forcebytestr(inst)
91 fmsg = stringutil.forcebytestr(inst)
92 if not fmsg:
92 if not fmsg:
93 fmsg = pycompat.byterepr(inst)
93 fmsg = pycompat.byterepr(inst)
94 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
94 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
95
95
96 def _checkrevlog(self, obj, name, linkrev):
96 def _checkrevlog(self, obj, name, linkrev):
97 """verify high level property of a revlog
97 """verify high level property of a revlog
98
98
99 - revlog is present,
99 - revlog is present,
100 - revlog is non-empty,
100 - revlog is non-empty,
101 - sizes (index and data) are correct,
101 - sizes (index and data) are correct,
102 - revlog's format version is correct.
102 - revlog's format version is correct.
103 """
103 """
104 if not len(obj) and (self.havecl or self.havemf):
104 if not len(obj) and (self.havecl or self.havemf):
105 self._err(linkrev, _(b"empty or missing %s") % name)
105 self._err(linkrev, _(b"empty or missing %s") % name)
106 return
106 return
107
107
108 d = obj.checksize()
108 d = obj.checksize()
109 if d[0]:
109 if d[0]:
110 self._err(None, _(b"data length off by %d bytes") % d[0], name)
110 self._err(None, _(b"data length off by %d bytes") % d[0], name)
111 if d[1]:
111 if d[1]:
112 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
112 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
113
113
114 if obj._format_version != revlog.REVLOGV0:
114 if obj._format_version != revlog.REVLOGV0:
115 if not self.revlogv1:
115 if not self.revlogv1:
116 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
116 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
117 elif self.revlogv1:
117 elif self.revlogv1:
118 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
118 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
119
119
120 def _checkentry(self, obj, i, node, seen, linkrevs, f):
120 def _checkentry(self, obj, i, node, seen, linkrevs, f):
121 """verify a single revlog entry
121 """verify a single revlog entry
122
122
123 arguments are:
123 arguments are:
124 - obj: the source revlog
124 - obj: the source revlog
125 - i: the revision number
125 - i: the revision number
126 - node: the revision node id
126 - node: the revision node id
127 - seen: nodes previously seen for this revlog
127 - seen: nodes previously seen for this revlog
128 - linkrevs: [changelog-revisions] introducing "node"
128 - linkrevs: [changelog-revisions] introducing "node"
129 - f: string label ("changelog", "manifest", or filename)
129 - f: string label ("changelog", "manifest", or filename)
130
130
131 Performs the following checks:
131 Performs the following checks:
132 - linkrev points to an existing changelog revision,
132 - linkrev points to an existing changelog revision,
133 - linkrev points to a changelog revision that introduces this revision,
133 - linkrev points to a changelog revision that introduces this revision,
134 - linkrev points to the lowest of these changesets,
134 - linkrev points to the lowest of these changesets,
135 - both parents exist in the revlog,
135 - both parents exist in the revlog,
136 - the revision is not duplicated.
136 - the revision is not duplicated.
137
137
138 Return the linkrev of the revision (or None for changelog's revisions).
138 Return the linkrev of the revision (or None for changelog's revisions).
139 """
139 """
140 lr = obj.linkrev(obj.rev(node))
140 lr = obj.linkrev(obj.rev(node))
141 if lr < 0 or (self.havecl and lr not in linkrevs):
141 if lr < 0 or (self.havecl and lr not in linkrevs):
142 if lr < 0 or lr >= len(self.repo.changelog):
142 if lr < 0 or lr >= len(self.repo.changelog):
143 msg = _(b"rev %d points to nonexistent changeset %d")
143 msg = _(b"rev %d points to nonexistent changeset %d")
144 else:
144 else:
145 msg = _(b"rev %d points to unexpected changeset %d")
145 msg = _(b"rev %d points to unexpected changeset %d")
146 self._err(None, msg % (i, lr), f)
146 self._err(None, msg % (i, lr), f)
147 if linkrevs:
147 if linkrevs:
148 if f and len(linkrevs) > 1:
148 if f and len(linkrevs) > 1:
149 try:
149 try:
150 # attempt to filter down to real linkrevs
150 # attempt to filter down to real linkrevs
151 linkrevs = []
151 linkrevs = []
152 for lr in linkrevs:
152 for lr in linkrevs:
153 if self.lrugetctx(lr)[f].filenode() == node:
153 if self.lrugetctx(lr)[f].filenode() == node:
154 linkrevs.append(lr)
154 linkrevs.append(lr)
155 except Exception:
155 except Exception:
156 pass
156 pass
157 msg = _(b" (expected %s)")
157 msg = _(b" (expected %s)")
158 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
158 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
159 self._warn(msg)
159 self._warn(msg)
160 lr = None # can't be trusted
160 lr = None # can't be trusted
161
161
162 try:
162 try:
163 p1, p2 = obj.parents(node)
163 p1, p2 = obj.parents(node)
164 if p1 not in seen and p1 != self.repo.nullid:
164 if p1 not in seen and p1 != self.repo.nullid:
165 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
165 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
166 self._err(lr, msg, f)
166 self._err(lr, msg, f)
167 if p2 not in seen and p2 != self.repo.nullid:
167 if p2 not in seen and p2 != self.repo.nullid:
168 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
168 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
169 self._err(lr, msg, f)
169 self._err(lr, msg, f)
170 except Exception as inst:
170 except Exception as inst:
171 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
171 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
172
172
173 if node in seen:
173 if node in seen:
174 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
174 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
175 seen[node] = i
175 seen[node] = i
176 return lr
176 return lr
177
177
178 def verify(self):
178 def verify(self):
179 """verify the content of the Mercurial repository
179 """verify the content of the Mercurial repository
180
180
181 This method run all verifications, displaying issues as they are found.
181 This method run all verifications, displaying issues as they are found.
182
182
183 return 1 if any error have been encountered, 0 otherwise."""
183 return 1 if any error have been encountered, 0 otherwise."""
184 # initial validation and generic report
184 # initial validation and generic report
185 repo = self.repo
185 repo = self.repo
186 ui = repo.ui
186 ui = repo.ui
187 if not repo.url().startswith(b'file:'):
187 if not repo.url().startswith(b'file:'):
188 raise error.Abort(_(b"cannot verify bundle or remote repos"))
188 raise error.Abort(_(b"cannot verify bundle or remote repos"))
189
189
190 if os.path.exists(repo.sjoin(b"journal")):
190 if os.path.exists(repo.sjoin(b"journal")):
191 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
191 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
192
192
193 if ui.verbose or not self.revlogv1:
193 if ui.verbose or not self.revlogv1:
194 ui.status(
194 ui.status(
195 _(b"repository uses revlog format %d\n")
195 _(b"repository uses revlog format %d\n")
196 % (self.revlogv1 and 1 or 0)
196 % (self.revlogv1 and 1 or 0)
197 )
197 )
198
198
199 # data verification
199 # data verification
200 mflinkrevs, filelinkrevs = self._verifychangelog()
200 mflinkrevs, filelinkrevs = self._verifychangelog()
201 filenodes = self._verifymanifest(mflinkrevs)
201 filenodes = self._verifymanifest(mflinkrevs)
202 del mflinkrevs
202 del mflinkrevs
203 self._crosscheckfiles(filelinkrevs, filenodes)
203 self._crosscheckfiles(filelinkrevs, filenodes)
204 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
204 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
205
205
206 # final report
206 # final report
207 ui.status(
207 ui.status(
208 _(b"checked %d changesets with %d changes to %d files\n")
208 _(b"checked %d changesets with %d changes to %d files\n")
209 % (len(repo.changelog), filerevisions, totalfiles)
209 % (len(repo.changelog), filerevisions, totalfiles)
210 )
210 )
211 if self.warnings:
211 if self.warnings:
212 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
212 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
213 if self.fncachewarned:
213 if self.fncachewarned:
214 ui.warn(HINT_FNCACHE)
214 ui.warn(HINT_FNCACHE)
215 if self.errors:
215 if self.errors:
216 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
216 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
217 if self.badrevs:
217 if self.badrevs:
218 msg = _(b"(first damaged changeset appears to be %d)\n")
218 msg = _(b"(first damaged changeset appears to be %d)\n")
219 msg %= min(self.badrevs)
219 msg %= min(self.badrevs)
220 ui.warn(msg)
220 ui.warn(msg)
221 return 1
221 return 1
222 return 0
222 return 0
223
223
224 def _verifychangelog(self):
224 def _verifychangelog(self):
225 """verify the changelog of a repository
225 """verify the changelog of a repository
226
226
227 The following checks are performed:
227 The following checks are performed:
228 - all of `_checkrevlog` checks,
228 - all of `_checkrevlog` checks,
229 - all of `_checkentry` checks (for each revisions),
229 - all of `_checkentry` checks (for each revisions),
230 - each revision can be read.
230 - each revision can be read.
231
231
232 The function returns some of the data observed in the changesets as a
232 The function returns some of the data observed in the changesets as a
233 (mflinkrevs, filelinkrevs) tuples:
233 (mflinkrevs, filelinkrevs) tuples:
234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
236
236
237 If a matcher was specified, filelinkrevs will only contains matched
237 If a matcher was specified, filelinkrevs will only contains matched
238 files.
238 files.
239 """
239 """
240 ui = self.ui
240 ui = self.ui
241 repo = self.repo
241 repo = self.repo
242 match = self.match
242 match = self.match
243 cl = repo.changelog
243 cl = repo.changelog
244
244
245 ui.status(_(b"checking changesets\n"))
245 ui.status(_(b"checking changesets\n"))
246 mflinkrevs = {}
246 mflinkrevs = {}
247 filelinkrevs = {}
247 filelinkrevs = {}
248 seen = {}
248 seen = {}
249 self._checkrevlog(cl, b"changelog", 0)
249 self._checkrevlog(cl, b"changelog", 0)
250 progress = ui.makeprogress(
250 progress = ui.makeprogress(
251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
252 )
252 )
253 for i in repo:
253 for i in repo:
254 progress.update(i)
254 progress.update(i)
255 n = cl.node(i)
255 n = cl.node(i)
256 self._checkentry(cl, i, n, seen, [i], b"changelog")
256 self._checkentry(cl, i, n, seen, [i], b"changelog")
257
257
258 try:
258 try:
259 changes = cl.read(n)
259 changes = cl.read(n)
260 if changes[0] != self.repo.nullid:
260 if changes[0] != self.repo.nullid:
261 mflinkrevs.setdefault(changes[0], []).append(i)
261 mflinkrevs.setdefault(changes[0], []).append(i)
262 self.refersmf = True
262 self.refersmf = True
263 for f in changes[3]:
263 for f in changes[3]:
264 if match(f):
264 if match(f):
265 filelinkrevs.setdefault(_normpath(f), []).append(i)
265 filelinkrevs.setdefault(_normpath(f), []).append(i)
266 except Exception as inst:
266 except Exception as inst:
267 self.refersmf = True
267 self.refersmf = True
268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
269 progress.complete()
269 progress.complete()
270 return mflinkrevs, filelinkrevs
270 return mflinkrevs, filelinkrevs
271
271
272 def _verifymanifest(
272 def _verifymanifest(
273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
274 ):
274 ):
275 """verify the manifestlog content
275 """verify the manifestlog content
276
276
277 Inputs:
277 Inputs:
278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
279 - dir: a subdirectory to check (for tree manifest repo)
279 - dir: a subdirectory to check (for tree manifest repo)
280 - storefiles: set of currently "orphan" files.
280 - storefiles: set of currently "orphan" files.
281 - subdirprogress: a progress object
281 - subdirprogress: a progress object
282
282
283 This function checks:
283 This function checks:
284 * all of `_checkrevlog` checks (for all manifest related revlogs)
284 * all of `_checkrevlog` checks (for all manifest related revlogs)
285 * all of `_checkentry` checks (for all manifest related revisions)
285 * all of `_checkentry` checks (for all manifest related revisions)
286 * nodes for subdirectory exists in the sub-directory manifest
286 * nodes for subdirectory exists in the sub-directory manifest
287 * each manifest entries have a file path
287 * each manifest entries have a file path
288 * each manifest node refered in mflinkrevs exist in the manifest log
288 * each manifest node refered in mflinkrevs exist in the manifest log
289
289
290 If tree manifest is in use and a matchers is specified, only the
290 If tree manifest is in use and a matchers is specified, only the
291 sub-directories matching it will be verified.
291 sub-directories matching it will be verified.
292
292
293 return a two level mapping:
293 return a two level mapping:
294 {"path" -> { filenode -> changelog-revision}}
294 {"path" -> { filenode -> changelog-revision}}
295
295
296 This mapping primarily contains entries for every files in the
296 This mapping primarily contains entries for every files in the
297 repository. In addition, when tree-manifest is used, it also contains
297 repository. In addition, when tree-manifest is used, it also contains
298 sub-directory entries.
298 sub-directory entries.
299
299
300 If a matcher is provided, only matching paths will be included.
300 If a matcher is provided, only matching paths will be included.
301 """
301 """
302 repo = self.repo
302 repo = self.repo
303 ui = self.ui
303 ui = self.ui
304 match = self.match
304 match = self.match
305 mfl = self.repo.manifestlog
305 mfl = self.repo.manifestlog
306 mf = mfl.getstorage(dir)
306 mf = mfl.getstorage(dir)
307
307
308 if not dir:
308 if not dir:
309 self.ui.status(_(b"checking manifests\n"))
309 self.ui.status(_(b"checking manifests\n"))
310
310
311 filenodes = {}
311 filenodes = {}
312 subdirnodes = {}
312 subdirnodes = {}
313 seen = {}
313 seen = {}
314 label = b"manifest"
314 label = b"manifest"
315 if dir:
315 if dir:
316 label = dir
316 label = dir
317 revlogfiles = mf.files()
317 revlogfiles = mf.files()
318 storefiles.difference_update(revlogfiles)
318 storefiles.difference_update(revlogfiles)
319 if subdirprogress: # should be true since we're in a subdirectory
319 if subdirprogress: # should be true since we're in a subdirectory
320 subdirprogress.increment()
320 subdirprogress.increment()
321 if self.refersmf:
321 if self.refersmf:
322 # Do not check manifest if there are only changelog entries with
322 # Do not check manifest if there are only changelog entries with
323 # null manifests.
323 # null manifests.
324 self._checkrevlog(mf._revlog, label, 0)
324 self._checkrevlog(mf._revlog, label, 0)
325 progress = ui.makeprogress(
325 progress = ui.makeprogress(
326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
327 )
327 )
328 for i in mf:
328 for i in mf:
329 if not dir:
329 if not dir:
330 progress.update(i)
330 progress.update(i)
331 n = mf.node(i)
331 n = mf.node(i)
332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
333 if n in mflinkrevs:
333 if n in mflinkrevs:
334 del mflinkrevs[n]
334 del mflinkrevs[n]
335 elif dir:
335 elif dir:
336 msg = _(b"%s not in parent-directory manifest") % short(n)
336 msg = _(b"%s not in parent-directory manifest") % short(n)
337 self._err(lr, msg, label)
337 self._err(lr, msg, label)
338 else:
338 else:
339 self._err(lr, _(b"%s not in changesets") % short(n), label)
339 self._err(lr, _(b"%s not in changesets") % short(n), label)
340
340
341 try:
341 try:
342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
343 for f, fn, fl in mfdelta.iterentries():
343 for f, fn, fl in mfdelta.iterentries():
344 if not f:
344 if not f:
345 self._err(lr, _(b"entry without name in manifest"))
345 self._err(lr, _(b"entry without name in manifest"))
346 elif f == b"/dev/null": # ignore this in very old repos
346 elif f == b"/dev/null": # ignore this in very old repos
347 continue
347 continue
348 fullpath = dir + _normpath(f)
348 fullpath = dir + _normpath(f)
349 if fl == b't':
349 if fl == b't':
350 if not match.visitdir(fullpath):
350 if not match.visitdir(fullpath):
351 continue
351 continue
352 sdn = subdirnodes.setdefault(fullpath + b'/', {})
352 sdn = subdirnodes.setdefault(fullpath + b'/', {})
353 sdn.setdefault(fn, []).append(lr)
353 sdn.setdefault(fn, []).append(lr)
354 else:
354 else:
355 if not match(fullpath):
355 if not match(fullpath):
356 continue
356 continue
357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
358 except Exception as inst:
358 except Exception as inst:
359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
360 if self._level >= VERIFY_FULL:
360 if self._level >= VERIFY_FULL:
361 try:
361 try:
362 # Various issues can affect manifest. So we read each full
362 # Various issues can affect manifest. So we read each full
363 # text from storage. This triggers the checks from the core
363 # text from storage. This triggers the checks from the core
364 # code (eg: hash verification, filename are ordered, etc.)
364 # code (eg: hash verification, filename are ordered, etc.)
365 mfdelta = mfl.get(dir, n).read()
365 mfdelta = mfl.get(dir, n).read()
366 except Exception as inst:
366 except Exception as inst:
367 msg = _(b"reading full manifest %s") % short(n)
367 msg = _(b"reading full manifest %s") % short(n)
368 self._exc(lr, msg, inst, label)
368 self._exc(lr, msg, inst, label)
369
369
370 if not dir:
370 if not dir:
371 progress.complete()
371 progress.complete()
372
372
373 if self.havemf:
373 if self.havemf:
374 # since we delete entry in `mflinkrevs` during iteration, any
374 # since we delete entry in `mflinkrevs` during iteration, any
375 # remaining entries are "missing". We need to issue errors for them.
375 # remaining entries are "missing". We need to issue errors for them.
376 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
376 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
377 for c, m in sorted(changesetpairs):
377 for c, m in sorted(changesetpairs):
378 if dir:
378 if dir:
379 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
379 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
380 else:
380 else:
381 msg = _(b"changeset refers to unknown revision %s")
381 msg = _(b"changeset refers to unknown revision %s")
382 msg %= short(m)
382 msg %= short(m)
383 self._err(c, msg, label)
383 self._err(c, msg, label)
384
384
385 if not dir and subdirnodes:
385 if not dir and subdirnodes:
386 self.ui.status(_(b"checking directory manifests\n"))
386 self.ui.status(_(b"checking directory manifests\n"))
387 storefiles = set()
387 storefiles = set()
388 subdirs = set()
388 subdirs = set()
389 revlogv1 = self.revlogv1
389 revlogv1 = self.revlogv1
390 for t, f, f2, size in repo.store.datafiles():
390 for t, f, f2, size in repo.store.datafiles():
391 if not f:
391 if not f:
392 self._err(None, _(b"cannot decode filename '%s'") % f2)
392 self._err(None, _(b"cannot decode filename '%s'") % f2)
393 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
393 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
394 storefiles.add(_normpath(f))
394 storefiles.add(_normpath(f))
395 subdirs.add(os.path.dirname(f))
395 subdirs.add(os.path.dirname(f))
396 subdirprogress = ui.makeprogress(
396 subdirprogress = ui.makeprogress(
397 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
397 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
398 )
398 )
399
399
400 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
400 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
401 subdirfilenodes = self._verifymanifest(
401 subdirfilenodes = self._verifymanifest(
402 linkrevs, subdir, storefiles, subdirprogress
402 linkrevs, subdir, storefiles, subdirprogress
403 )
403 )
404 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
404 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
405 filenodes.setdefault(f, {}).update(onefilenodes)
405 filenodes.setdefault(f, {}).update(onefilenodes)
406
406
407 if not dir and subdirnodes:
407 if not dir and subdirnodes:
408 assert subdirprogress is not None # help pytype
408 assert subdirprogress is not None # help pytype
409 subdirprogress.complete()
409 subdirprogress.complete()
410 if self.warnorphanstorefiles:
410 if self.warnorphanstorefiles:
411 for f in sorted(storefiles):
411 for f in sorted(storefiles):
412 self._warn(_(b"warning: orphan data file '%s'") % f)
412 self._warn(_(b"warning: orphan data file '%s'") % f)
413
413
414 return filenodes
414 return filenodes
415
415
416 def _crosscheckfiles(self, filelinkrevs, filenodes):
416 def _crosscheckfiles(self, filelinkrevs, filenodes):
417 repo = self.repo
417 repo = self.repo
418 ui = self.ui
418 ui = self.ui
419 ui.status(_(b"crosschecking files in changesets and manifests\n"))
419 ui.status(_(b"crosschecking files in changesets and manifests\n"))
420
420
421 total = len(filelinkrevs) + len(filenodes)
421 total = len(filelinkrevs) + len(filenodes)
422 progress = ui.makeprogress(
422 progress = ui.makeprogress(
423 _(b'crosschecking'), unit=_(b'files'), total=total
423 _(b'crosschecking'), unit=_(b'files'), total=total
424 )
424 )
425 if self.havemf:
425 if self.havemf:
426 for f in sorted(filelinkrevs):
426 for f in sorted(filelinkrevs):
427 progress.increment()
427 progress.increment()
428 if f not in filenodes:
428 if f not in filenodes:
429 lr = filelinkrevs[f][0]
429 lr = filelinkrevs[f][0]
430 self._err(lr, _(b"in changeset but not in manifest"), f)
430 self._err(lr, _(b"in changeset but not in manifest"), f)
431
431
432 if self.havecl:
432 if self.havecl:
433 for f in sorted(filenodes):
433 for f in sorted(filenodes):
434 progress.increment()
434 progress.increment()
435 if f not in filelinkrevs:
435 if f not in filelinkrevs:
436 try:
436 try:
437 fl = repo.file(f)
437 fl = repo.file(f)
438 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
438 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
439 except Exception:
439 except Exception:
440 lr = None
440 lr = None
441 self._err(lr, _(b"in manifest but not in changeset"), f)
441 self._err(lr, _(b"in manifest but not in changeset"), f)
442
442
443 progress.complete()
443 progress.complete()
444
444
445 def _verifyfiles(self, filenodes, filelinkrevs):
445 def _verifyfiles(self, filenodes, filelinkrevs):
446 repo = self.repo
446 repo = self.repo
447 ui = self.ui
447 ui = self.ui
448 lrugetctx = self.lrugetctx
448 lrugetctx = self.lrugetctx
449 revlogv1 = self.revlogv1
449 revlogv1 = self.revlogv1
450 havemf = self.havemf
450 havemf = self.havemf
451 ui.status(_(b"checking files\n"))
451 ui.status(_(b"checking files\n"))
452
452
453 storefiles = set()
453 storefiles = set()
454 for rl_type, f, f2, size in repo.store.datafiles():
454 for rl_type, f, f2, size in repo.store.datafiles():
455 if not f:
455 if not f:
456 self._err(None, _(b"cannot decode filename '%s'") % f2)
456 self._err(None, _(b"cannot decode filename '%s'") % f2)
457 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
457 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
458 storefiles.add(_normpath(f))
458 storefiles.add(_normpath(f))
459
459
460 state = {
460 state = {
461 # TODO this assumes revlog storage for changelog.
461 # TODO this assumes revlog storage for changelog.
462 b'expectedversion': self.repo.changelog._format_version,
462 b'expectedversion': self.repo.changelog._format_version,
463 b'skipflags': self.skipflags,
463 b'skipflags': self.skipflags,
464 # experimental config: censor.policy
464 # experimental config: censor.policy
465 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
465 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
466 }
466 }
467
467
468 files = sorted(set(filenodes) | set(filelinkrevs))
468 files = sorted(set(filenodes) | set(filelinkrevs))
469 revisions = 0
469 revisions = 0
470 progress = ui.makeprogress(
470 progress = ui.makeprogress(
471 _(b'checking'), unit=_(b'files'), total=len(files)
471 _(b'checking'), unit=_(b'files'), total=len(files)
472 )
472 )
473 for i, f in enumerate(files):
473 for i, f in enumerate(files):
474 progress.update(i, item=f)
474 progress.update(i, item=f)
475 try:
475 try:
476 linkrevs = filelinkrevs[f]
476 linkrevs = filelinkrevs[f]
477 except KeyError:
477 except KeyError:
478 # in manifest but not in changelog
478 # in manifest but not in changelog
479 linkrevs = []
479 linkrevs = []
480
480
481 if linkrevs:
481 if linkrevs:
482 lr = linkrevs[0]
482 lr = linkrevs[0]
483 else:
483 else:
484 lr = None
484 lr = None
485
485
486 try:
486 try:
487 fl = repo.file(f)
487 fl = repo.file(f)
488 except error.StorageError as e:
488 except error.StorageError as e:
489 self._err(lr, _(b"broken revlog! (%s)") % e, f)
489 self._err(lr, _(b"broken revlog! (%s)") % e, f)
490 continue
490 continue
491
491
492 for ff in fl.files():
492 for ff in fl.files():
493 try:
493 try:
494 storefiles.remove(ff)
494 storefiles.remove(ff)
495 except KeyError:
495 except KeyError:
496 if self.warnorphanstorefiles:
496 if self.warnorphanstorefiles:
497 self._warn(
497 msg = _(b" warning: revlog '%s' not in fncache!")
498 _(b" warning: revlog '%s' not in fncache!") % ff
498 self._warn(msg % ff)
499 )
500 self.fncachewarned = True
499 self.fncachewarned = True
501
500
502 if not len(fl) and (self.havecl or self.havemf):
501 if not len(fl) and (self.havecl or self.havemf):
503 self._err(lr, _(b"empty or missing %s") % f)
502 self._err(lr, _(b"empty or missing %s") % f)
504 else:
503 else:
505 # Guard against implementations not setting this.
504 # Guard against implementations not setting this.
506 state[b'skipread'] = set()
505 state[b'skipread'] = set()
507 state[b'safe_renamed'] = set()
506 state[b'safe_renamed'] = set()
508
507
509 for problem in fl.verifyintegrity(state):
508 for problem in fl.verifyintegrity(state):
510 if problem.node is not None:
509 if problem.node is not None:
511 linkrev = fl.linkrev(fl.rev(problem.node))
510 linkrev = fl.linkrev(fl.rev(problem.node))
512 else:
511 else:
513 linkrev = None
512 linkrev = None
514
513
515 if problem.warning:
514 if problem.warning:
516 self._warn(problem.warning)
515 self._warn(problem.warning)
517 elif problem.error:
516 elif problem.error:
518 self._err(
517 self._err(
519 linkrev if linkrev is not None else lr,
518 linkrev if linkrev is not None else lr,
520 problem.error,
519 problem.error,
521 f,
520 f,
522 )
521 )
523 else:
522 else:
524 raise error.ProgrammingError(
523 raise error.ProgrammingError(
525 b'problem instance does not set warning or error '
524 b'problem instance does not set warning or error '
526 b'attribute: %s' % problem.msg
525 b'attribute: %s' % problem.msg
527 )
526 )
528
527
529 seen = {}
528 seen = {}
530 for i in fl:
529 for i in fl:
531 revisions += 1
530 revisions += 1
532 n = fl.node(i)
531 n = fl.node(i)
533 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
532 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
534 if f in filenodes:
533 if f in filenodes:
535 if havemf and n not in filenodes[f]:
534 if havemf and n not in filenodes[f]:
536 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
535 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
537 else:
536 else:
538 del filenodes[f][n]
537 del filenodes[f][n]
539
538
540 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
539 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
541 continue
540 continue
542
541
543 # check renames
542 # check renames
544 try:
543 try:
545 # This requires resolving fulltext (at least on revlogs,
544 # This requires resolving fulltext (at least on revlogs,
546 # though not with LFS revisions). We may want
545 # though not with LFS revisions). We may want
547 # ``verifyintegrity()`` to pass a set of nodes with
546 # ``verifyintegrity()`` to pass a set of nodes with
548 # rename metadata as an optimization.
547 # rename metadata as an optimization.
549 rp = fl.renamed(n)
548 rp = fl.renamed(n)
550 if rp:
549 if rp:
551 if lr is not None and ui.verbose:
550 if lr is not None and ui.verbose:
552 ctx = lrugetctx(lr)
551 ctx = lrugetctx(lr)
553 if not any(rp[0] in pctx for pctx in ctx.parents()):
552 if not any(rp[0] in pctx for pctx in ctx.parents()):
554 self._warn(
553 self._warn(
555 _(
554 _(
556 b"warning: copy source of '%s' not"
555 b"warning: copy source of '%s' not"
557 b" in parents of %s"
556 b" in parents of %s"
558 )
557 )
559 % (f, ctx)
558 % (f, ctx)
560 )
559 )
561 fl2 = repo.file(rp[0])
560 fl2 = repo.file(rp[0])
562 if not len(fl2):
561 if not len(fl2):
563 self._err(
562 self._err(
564 lr,
563 lr,
565 _(
564 _(
566 b"empty or missing copy source revlog "
565 b"empty or missing copy source revlog "
567 b"%s:%s"
566 b"%s:%s"
568 )
567 )
569 % (rp[0], short(rp[1])),
568 % (rp[0], short(rp[1])),
570 f,
569 f,
571 )
570 )
572 elif rp[1] == self.repo.nullid:
571 elif rp[1] == self.repo.nullid:
573 ui.note(
572 ui.note(
574 _(
573 _(
575 b"warning: %s@%s: copy source"
574 b"warning: %s@%s: copy source"
576 b" revision is nullid %s:%s\n"
575 b" revision is nullid %s:%s\n"
577 )
576 )
578 % (f, lr, rp[0], short(rp[1]))
577 % (f, lr, rp[0], short(rp[1]))
579 )
578 )
580 else:
579 else:
581 fl2.rev(rp[1])
580 fl2.rev(rp[1])
582 except Exception as inst:
581 except Exception as inst:
583 self._exc(
582 self._exc(
584 lr, _(b"checking rename of %s") % short(n), inst, f
583 lr, _(b"checking rename of %s") % short(n), inst, f
585 )
584 )
586
585
587 # cross-check
586 # cross-check
588 if f in filenodes:
587 if f in filenodes:
589 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
588 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
590 for lr, node in sorted(fns):
589 for lr, node in sorted(fns):
591 self._err(
590 self._err(
592 lr,
591 lr,
593 _(b"manifest refers to unknown revision %s")
592 _(b"manifest refers to unknown revision %s")
594 % short(node),
593 % short(node),
595 f,
594 f,
596 )
595 )
597 progress.complete()
596 progress.complete()
598
597
599 if self.warnorphanstorefiles:
598 if self.warnorphanstorefiles:
600 for f in sorted(storefiles):
599 for f in sorted(storefiles):
601 self._warn(_(b"warning: orphan data file '%s'") % f)
600 self._warn(_(b"warning: orphan data file '%s'") % f)
602
601
603 return len(files), revisions
602 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now