##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48152:9823b348 default
parent child Browse files
Show More
@@ -1,610 +1,606 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
47 )
48
45
49
46 class verifier(object):
50 class verifier(object):
47 def __init__(self, repo, level=None):
51 def __init__(self, repo, level=None):
48 self.repo = repo.unfiltered()
52 self.repo = repo.unfiltered()
49 self.ui = repo.ui
53 self.ui = repo.ui
50 self.match = repo.narrowmatch()
54 self.match = repo.narrowmatch()
51 if level is None:
55 if level is None:
52 level = VERIFY_DEFAULT
56 level = VERIFY_DEFAULT
53 self._level = level
57 self._level = level
54 self.badrevs = set()
58 self.badrevs = set()
55 self.errors = 0
59 self.errors = 0
56 self.warnings = 0
60 self.warnings = 0
57 self.havecl = len(repo.changelog) > 0
61 self.havecl = len(repo.changelog) > 0
58 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
62 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
59 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
63 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
60 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
64 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
61 self.refersmf = False
65 self.refersmf = False
62 self.fncachewarned = False
66 self.fncachewarned = False
63 # developer config: verify.skipflags
67 # developer config: verify.skipflags
64 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
68 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
65 self.warnorphanstorefiles = True
69 self.warnorphanstorefiles = True
66
70
67 def _warn(self, msg):
71 def _warn(self, msg):
68 """record a "warning" level issue"""
72 """record a "warning" level issue"""
69 self.ui.warn(msg + b"\n")
73 self.ui.warn(msg + b"\n")
70 self.warnings += 1
74 self.warnings += 1
71
75
72 def _err(self, linkrev, msg, filename=None):
76 def _err(self, linkrev, msg, filename=None):
73 """record a "error" level issue"""
77 """record a "error" level issue"""
74 if linkrev is not None:
78 if linkrev is not None:
75 self.badrevs.add(linkrev)
79 self.badrevs.add(linkrev)
76 linkrev = b"%d" % linkrev
80 linkrev = b"%d" % linkrev
77 else:
81 else:
78 linkrev = b'?'
82 linkrev = b'?'
79 msg = b"%s: %s" % (linkrev, msg)
83 msg = b"%s: %s" % (linkrev, msg)
80 if filename:
84 if filename:
81 msg = b"%s@%s" % (filename, msg)
85 msg = b"%s@%s" % (filename, msg)
82 self.ui.warn(b" " + msg + b"\n")
86 self.ui.warn(b" " + msg + b"\n")
83 self.errors += 1
87 self.errors += 1
84
88
85 def _exc(self, linkrev, msg, inst, filename=None):
89 def _exc(self, linkrev, msg, inst, filename=None):
86 """record exception raised during the verify process"""
90 """record exception raised during the verify process"""
87 fmsg = stringutil.forcebytestr(inst)
91 fmsg = stringutil.forcebytestr(inst)
88 if not fmsg:
92 if not fmsg:
89 fmsg = pycompat.byterepr(inst)
93 fmsg = pycompat.byterepr(inst)
90 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
94 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
91
95
92 def _checkrevlog(self, obj, name, linkrev):
96 def _checkrevlog(self, obj, name, linkrev):
93 """verify high level property of a revlog
97 """verify high level property of a revlog
94
98
95 - revlog is present,
99 - revlog is present,
96 - revlog is non-empty,
100 - revlog is non-empty,
97 - sizes (index and data) are correct,
101 - sizes (index and data) are correct,
98 - revlog's format version is correct.
102 - revlog's format version is correct.
99 """
103 """
100 if not len(obj) and (self.havecl or self.havemf):
104 if not len(obj) and (self.havecl or self.havemf):
101 self._err(linkrev, _(b"empty or missing %s") % name)
105 self._err(linkrev, _(b"empty or missing %s") % name)
102 return
106 return
103
107
104 d = obj.checksize()
108 d = obj.checksize()
105 if d[0]:
109 if d[0]:
106 self._err(None, _(b"data length off by %d bytes") % d[0], name)
110 self._err(None, _(b"data length off by %d bytes") % d[0], name)
107 if d[1]:
111 if d[1]:
108 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
112 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
109
113
110 if obj._format_version != revlog.REVLOGV0:
114 if obj._format_version != revlog.REVLOGV0:
111 if not self.revlogv1:
115 if not self.revlogv1:
112 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
116 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
113 elif self.revlogv1:
117 elif self.revlogv1:
114 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
118 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
115
119
116 def _checkentry(self, obj, i, node, seen, linkrevs, f):
120 def _checkentry(self, obj, i, node, seen, linkrevs, f):
117 """verify a single revlog entry
121 """verify a single revlog entry
118
122
119 arguments are:
123 arguments are:
120 - obj: the source revlog
124 - obj: the source revlog
121 - i: the revision number
125 - i: the revision number
122 - node: the revision node id
126 - node: the revision node id
123 - seen: nodes previously seen for this revlog
127 - seen: nodes previously seen for this revlog
124 - linkrevs: [changelog-revisions] introducing "node"
128 - linkrevs: [changelog-revisions] introducing "node"
125 - f: string label ("changelog", "manifest", or filename)
129 - f: string label ("changelog", "manifest", or filename)
126
130
127 Performs the following checks:
131 Performs the following checks:
128 - linkrev points to an existing changelog revision,
132 - linkrev points to an existing changelog revision,
129 - linkrev points to a changelog revision that introduces this revision,
133 - linkrev points to a changelog revision that introduces this revision,
130 - linkrev points to the lowest of these changesets,
134 - linkrev points to the lowest of these changesets,
131 - both parents exist in the revlog,
135 - both parents exist in the revlog,
132 - the revision is not duplicated.
136 - the revision is not duplicated.
133
137
134 Return the linkrev of the revision (or None for changelog's revisions).
138 Return the linkrev of the revision (or None for changelog's revisions).
135 """
139 """
136 lr = obj.linkrev(obj.rev(node))
140 lr = obj.linkrev(obj.rev(node))
137 if lr < 0 or (self.havecl and lr not in linkrevs):
141 if lr < 0 or (self.havecl and lr not in linkrevs):
138 if lr < 0 or lr >= len(self.repo.changelog):
142 if lr < 0 or lr >= len(self.repo.changelog):
139 msg = _(b"rev %d points to nonexistent changeset %d")
143 msg = _(b"rev %d points to nonexistent changeset %d")
140 else:
144 else:
141 msg = _(b"rev %d points to unexpected changeset %d")
145 msg = _(b"rev %d points to unexpected changeset %d")
142 self._err(None, msg % (i, lr), f)
146 self._err(None, msg % (i, lr), f)
143 if linkrevs:
147 if linkrevs:
144 if f and len(linkrevs) > 1:
148 if f and len(linkrevs) > 1:
145 try:
149 try:
146 # attempt to filter down to real linkrevs
150 # attempt to filter down to real linkrevs
147 linkrevs = []
151 linkrevs = []
148 for lr in linkrevs:
152 for lr in linkrevs:
149 if self.lrugetctx(lr)[f].filenode() == node:
153 if self.lrugetctx(lr)[f].filenode() == node:
150 linkrevs.append(lr)
154 linkrevs.append(lr)
151 except Exception:
155 except Exception:
152 pass
156 pass
153 msg = _(b" (expected %s)")
157 msg = _(b" (expected %s)")
154 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
158 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
155 self._warn(msg)
159 self._warn(msg)
156 lr = None # can't be trusted
160 lr = None # can't be trusted
157
161
158 try:
162 try:
159 p1, p2 = obj.parents(node)
163 p1, p2 = obj.parents(node)
160 if p1 not in seen and p1 != self.repo.nullid:
164 if p1 not in seen and p1 != self.repo.nullid:
161 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
165 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
162 self._err(lr, msg, f)
166 self._err(lr, msg, f)
163 if p2 not in seen and p2 != self.repo.nullid:
167 if p2 not in seen and p2 != self.repo.nullid:
164 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
168 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
165 self._err(lr, msg, f)
169 self._err(lr, msg, f)
166 except Exception as inst:
170 except Exception as inst:
167 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
171 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
168
172
169 if node in seen:
173 if node in seen:
170 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
174 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
171 seen[node] = i
175 seen[node] = i
172 return lr
176 return lr
173
177
174 def verify(self):
178 def verify(self):
175 """verify the content of the Mercurial repository
179 """verify the content of the Mercurial repository
176
180
177 This method run all verifications, displaying issues as they are found.
181 This method run all verifications, displaying issues as they are found.
178
182
179 return 1 if any error have been encountered, 0 otherwise."""
183 return 1 if any error have been encountered, 0 otherwise."""
180 # initial validation and generic report
184 # initial validation and generic report
181 repo = self.repo
185 repo = self.repo
182 ui = repo.ui
186 ui = repo.ui
183 if not repo.url().startswith(b'file:'):
187 if not repo.url().startswith(b'file:'):
184 raise error.Abort(_(b"cannot verify bundle or remote repos"))
188 raise error.Abort(_(b"cannot verify bundle or remote repos"))
185
189
186 if os.path.exists(repo.sjoin(b"journal")):
190 if os.path.exists(repo.sjoin(b"journal")):
187 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
191 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
188
192
189 if ui.verbose or not self.revlogv1:
193 if ui.verbose or not self.revlogv1:
190 ui.status(
194 ui.status(
191 _(b"repository uses revlog format %d\n")
195 _(b"repository uses revlog format %d\n")
192 % (self.revlogv1 and 1 or 0)
196 % (self.revlogv1 and 1 or 0)
193 )
197 )
194
198
195 # data verification
199 # data verification
196 mflinkrevs, filelinkrevs = self._verifychangelog()
200 mflinkrevs, filelinkrevs = self._verifychangelog()
197 filenodes = self._verifymanifest(mflinkrevs)
201 filenodes = self._verifymanifest(mflinkrevs)
198 del mflinkrevs
202 del mflinkrevs
199 self._crosscheckfiles(filelinkrevs, filenodes)
203 self._crosscheckfiles(filelinkrevs, filenodes)
200 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
204 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
201
205
202 # final report
206 # final report
203 ui.status(
207 ui.status(
204 _(b"checked %d changesets with %d changes to %d files\n")
208 _(b"checked %d changesets with %d changes to %d files\n")
205 % (len(repo.changelog), filerevisions, totalfiles)
209 % (len(repo.changelog), filerevisions, totalfiles)
206 )
210 )
207 if self.warnings:
211 if self.warnings:
208 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
212 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
209 if self.fncachewarned:
213 if self.fncachewarned:
210 ui.warn(HINT_FNCACHE)
214 ui.warn(HINT_FNCACHE)
211 if self.errors:
215 if self.errors:
212 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
216 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
213 if self.badrevs:
217 if self.badrevs:
214 msg = _(b"(first damaged changeset appears to be %d)\n")
218 msg = _(b"(first damaged changeset appears to be %d)\n")
215 msg %= min(self.badrevs)
219 msg %= min(self.badrevs)
216 ui.warn(msg)
220 ui.warn(msg)
217 return 1
221 return 1
218 return 0
222 return 0
219
223
220 def _verifychangelog(self):
224 def _verifychangelog(self):
221 """verify the changelog of a repository
225 """verify the changelog of a repository
222
226
223 The following checks are performed:
227 The following checks are performed:
224 - all of `_checkrevlog` checks,
228 - all of `_checkrevlog` checks,
225 - all of `_checkentry` checks (for each revisions),
229 - all of `_checkentry` checks (for each revisions),
226 - each revision can be read.
230 - each revision can be read.
227
231
228 The function returns some of the data observed in the changesets as a
232 The function returns some of the data observed in the changesets as a
229 (mflinkrevs, filelinkrevs) tuples:
233 (mflinkrevs, filelinkrevs) tuples:
230 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
234 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
231 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
235 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
232
236
233 If a matcher was specified, filelinkrevs will only contains matched
237 If a matcher was specified, filelinkrevs will only contains matched
234 files.
238 files.
235 """
239 """
236 ui = self.ui
240 ui = self.ui
237 repo = self.repo
241 repo = self.repo
238 match = self.match
242 match = self.match
239 cl = repo.changelog
243 cl = repo.changelog
240
244
241 ui.status(_(b"checking changesets\n"))
245 ui.status(_(b"checking changesets\n"))
242 mflinkrevs = {}
246 mflinkrevs = {}
243 filelinkrevs = {}
247 filelinkrevs = {}
244 seen = {}
248 seen = {}
245 self._checkrevlog(cl, b"changelog", 0)
249 self._checkrevlog(cl, b"changelog", 0)
246 progress = ui.makeprogress(
250 progress = ui.makeprogress(
247 _(b'checking'), unit=_(b'changesets'), total=len(repo)
251 _(b'checking'), unit=_(b'changesets'), total=len(repo)
248 )
252 )
249 for i in repo:
253 for i in repo:
250 progress.update(i)
254 progress.update(i)
251 n = cl.node(i)
255 n = cl.node(i)
252 self._checkentry(cl, i, n, seen, [i], b"changelog")
256 self._checkentry(cl, i, n, seen, [i], b"changelog")
253
257
254 try:
258 try:
255 changes = cl.read(n)
259 changes = cl.read(n)
256 if changes[0] != self.repo.nullid:
260 if changes[0] != self.repo.nullid:
257 mflinkrevs.setdefault(changes[0], []).append(i)
261 mflinkrevs.setdefault(changes[0], []).append(i)
258 self.refersmf = True
262 self.refersmf = True
259 for f in changes[3]:
263 for f in changes[3]:
260 if match(f):
264 if match(f):
261 filelinkrevs.setdefault(_normpath(f), []).append(i)
265 filelinkrevs.setdefault(_normpath(f), []).append(i)
262 except Exception as inst:
266 except Exception as inst:
263 self.refersmf = True
267 self.refersmf = True
264 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
268 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
265 progress.complete()
269 progress.complete()
266 return mflinkrevs, filelinkrevs
270 return mflinkrevs, filelinkrevs
267
271
268 def _verifymanifest(
272 def _verifymanifest(
269 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
273 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
270 ):
274 ):
271 """verify the manifestlog content
275 """verify the manifestlog content
272
276
273 Inputs:
277 Inputs:
274 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
278 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
275 - dir: a subdirectory to check (for tree manifest repo)
279 - dir: a subdirectory to check (for tree manifest repo)
276 - storefiles: set of currently "orphan" files.
280 - storefiles: set of currently "orphan" files.
277 - subdirprogress: a progress object
281 - subdirprogress: a progress object
278
282
279 This function checks:
283 This function checks:
280 * all of `_checkrevlog` checks (for all manifest related revlogs)
284 * all of `_checkrevlog` checks (for all manifest related revlogs)
281 * all of `_checkentry` checks (for all manifest related revisions)
285 * all of `_checkentry` checks (for all manifest related revisions)
282 * nodes for subdirectory exists in the sub-directory manifest
286 * nodes for subdirectory exists in the sub-directory manifest
283 * each manifest entries have a file path
287 * each manifest entries have a file path
284 * each manifest node refered in mflinkrevs exist in the manifest log
288 * each manifest node refered in mflinkrevs exist in the manifest log
285
289
286 If tree manifest is in use and a matchers is specified, only the
290 If tree manifest is in use and a matchers is specified, only the
287 sub-directories matching it will be verified.
291 sub-directories matching it will be verified.
288
292
289 return a two level mapping:
293 return a two level mapping:
290 {"path" -> { filenode -> changelog-revision}}
294 {"path" -> { filenode -> changelog-revision}}
291
295
292 This mapping primarily contains entries for every files in the
296 This mapping primarily contains entries for every files in the
293 repository. In addition, when tree-manifest is used, it also contains
297 repository. In addition, when tree-manifest is used, it also contains
294 sub-directory entries.
298 sub-directory entries.
295
299
296 If a matcher is provided, only matching paths will be included.
300 If a matcher is provided, only matching paths will be included.
297 """
301 """
298 repo = self.repo
302 repo = self.repo
299 ui = self.ui
303 ui = self.ui
300 match = self.match
304 match = self.match
301 mfl = self.repo.manifestlog
305 mfl = self.repo.manifestlog
302 mf = mfl.getstorage(dir)
306 mf = mfl.getstorage(dir)
303
307
304 if not dir:
308 if not dir:
305 self.ui.status(_(b"checking manifests\n"))
309 self.ui.status(_(b"checking manifests\n"))
306
310
307 filenodes = {}
311 filenodes = {}
308 subdirnodes = {}
312 subdirnodes = {}
309 seen = {}
313 seen = {}
310 label = b"manifest"
314 label = b"manifest"
311 if dir:
315 if dir:
312 label = dir
316 label = dir
313 revlogfiles = mf.files()
317 revlogfiles = mf.files()
314 storefiles.difference_update(revlogfiles)
318 storefiles.difference_update(revlogfiles)
315 if subdirprogress: # should be true since we're in a subdirectory
319 if subdirprogress: # should be true since we're in a subdirectory
316 subdirprogress.increment()
320 subdirprogress.increment()
317 if self.refersmf:
321 if self.refersmf:
318 # Do not check manifest if there are only changelog entries with
322 # Do not check manifest if there are only changelog entries with
319 # null manifests.
323 # null manifests.
320 self._checkrevlog(mf._revlog, label, 0)
324 self._checkrevlog(mf._revlog, label, 0)
321 progress = ui.makeprogress(
325 progress = ui.makeprogress(
322 _(b'checking'), unit=_(b'manifests'), total=len(mf)
326 _(b'checking'), unit=_(b'manifests'), total=len(mf)
323 )
327 )
324 for i in mf:
328 for i in mf:
325 if not dir:
329 if not dir:
326 progress.update(i)
330 progress.update(i)
327 n = mf.node(i)
331 n = mf.node(i)
328 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
332 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
329 if n in mflinkrevs:
333 if n in mflinkrevs:
330 del mflinkrevs[n]
334 del mflinkrevs[n]
331 elif dir:
335 elif dir:
332 msg = _(b"%s not in parent-directory manifest") % short(n)
336 msg = _(b"%s not in parent-directory manifest") % short(n)
333 self._err(lr, msg, label)
337 self._err(lr, msg, label)
334 else:
338 else:
335 self._err(lr, _(b"%s not in changesets") % short(n), label)
339 self._err(lr, _(b"%s not in changesets") % short(n), label)
336
340
337 try:
341 try:
338 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
342 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
339 for f, fn, fl in mfdelta.iterentries():
343 for f, fn, fl in mfdelta.iterentries():
340 if not f:
344 if not f:
341 self._err(lr, _(b"entry without name in manifest"))
345 self._err(lr, _(b"entry without name in manifest"))
342 elif f == b"/dev/null": # ignore this in very old repos
346 elif f == b"/dev/null": # ignore this in very old repos
343 continue
347 continue
344 fullpath = dir + _normpath(f)
348 fullpath = dir + _normpath(f)
345 if fl == b't':
349 if fl == b't':
346 if not match.visitdir(fullpath):
350 if not match.visitdir(fullpath):
347 continue
351 continue
348 sdn = subdirnodes.setdefault(fullpath + b'/', {})
352 sdn = subdirnodes.setdefault(fullpath + b'/', {})
349 sdn.setdefault(fn, []).append(lr)
353 sdn.setdefault(fn, []).append(lr)
350 else:
354 else:
351 if not match(fullpath):
355 if not match(fullpath):
352 continue
356 continue
353 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
357 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
354 except Exception as inst:
358 except Exception as inst:
355 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
359 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
356 if self._level >= VERIFY_FULL:
360 if self._level >= VERIFY_FULL:
357 try:
361 try:
358 # Various issues can affect manifest. So we read each full
362 # Various issues can affect manifest. So we read each full
359 # text from storage. This triggers the checks from the core
363 # text from storage. This triggers the checks from the core
360 # code (eg: hash verification, filename are ordered, etc.)
364 # code (eg: hash verification, filename are ordered, etc.)
361 mfdelta = mfl.get(dir, n).read()
365 mfdelta = mfl.get(dir, n).read()
362 except Exception as inst:
366 except Exception as inst:
363 msg = _(b"reading full manifest %s") % short(n)
367 msg = _(b"reading full manifest %s") % short(n)
364 self._exc(lr, msg, inst, label)
368 self._exc(lr, msg, inst, label)
365
369
366 if not dir:
370 if not dir:
367 progress.complete()
371 progress.complete()
368
372
369 if self.havemf:
373 if self.havemf:
370 # since we delete entry in `mflinkrevs` during iteration, any
374 # since we delete entry in `mflinkrevs` during iteration, any
371 # remaining entries are "missing". We need to issue errors for them.
375 # remaining entries are "missing". We need to issue errors for them.
372 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
376 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
373 for c, m in sorted(changesetpairs):
377 for c, m in sorted(changesetpairs):
374 if dir:
378 if dir:
375 self._err(
379 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
376 c,
377 _(
378 b"parent-directory manifest refers to unknown"
379 b" revision %s"
380 )
381 % short(m),
382 label,
383 )
384 else:
380 else:
385 self._err(
381 self._err(
386 c,
382 c,
387 _(b"changeset refers to unknown revision %s")
383 _(b"changeset refers to unknown revision %s")
388 % short(m),
384 % short(m),
389 label,
385 label,
390 )
386 )
391
387
392 if not dir and subdirnodes:
388 if not dir and subdirnodes:
393 self.ui.status(_(b"checking directory manifests\n"))
389 self.ui.status(_(b"checking directory manifests\n"))
394 storefiles = set()
390 storefiles = set()
395 subdirs = set()
391 subdirs = set()
396 revlogv1 = self.revlogv1
392 revlogv1 = self.revlogv1
397 for t, f, f2, size in repo.store.datafiles():
393 for t, f, f2, size in repo.store.datafiles():
398 if not f:
394 if not f:
399 self._err(None, _(b"cannot decode filename '%s'") % f2)
395 self._err(None, _(b"cannot decode filename '%s'") % f2)
400 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
396 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
401 storefiles.add(_normpath(f))
397 storefiles.add(_normpath(f))
402 subdirs.add(os.path.dirname(f))
398 subdirs.add(os.path.dirname(f))
403 subdirprogress = ui.makeprogress(
399 subdirprogress = ui.makeprogress(
404 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
400 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
405 )
401 )
406
402
407 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
403 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
408 subdirfilenodes = self._verifymanifest(
404 subdirfilenodes = self._verifymanifest(
409 linkrevs, subdir, storefiles, subdirprogress
405 linkrevs, subdir, storefiles, subdirprogress
410 )
406 )
411 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
407 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
412 filenodes.setdefault(f, {}).update(onefilenodes)
408 filenodes.setdefault(f, {}).update(onefilenodes)
413
409
414 if not dir and subdirnodes:
410 if not dir and subdirnodes:
415 assert subdirprogress is not None # help pytype
411 assert subdirprogress is not None # help pytype
416 subdirprogress.complete()
412 subdirprogress.complete()
417 if self.warnorphanstorefiles:
413 if self.warnorphanstorefiles:
418 for f in sorted(storefiles):
414 for f in sorted(storefiles):
419 self._warn(_(b"warning: orphan data file '%s'") % f)
415 self._warn(_(b"warning: orphan data file '%s'") % f)
420
416
421 return filenodes
417 return filenodes
422
418
423 def _crosscheckfiles(self, filelinkrevs, filenodes):
419 def _crosscheckfiles(self, filelinkrevs, filenodes):
424 repo = self.repo
420 repo = self.repo
425 ui = self.ui
421 ui = self.ui
426 ui.status(_(b"crosschecking files in changesets and manifests\n"))
422 ui.status(_(b"crosschecking files in changesets and manifests\n"))
427
423
428 total = len(filelinkrevs) + len(filenodes)
424 total = len(filelinkrevs) + len(filenodes)
429 progress = ui.makeprogress(
425 progress = ui.makeprogress(
430 _(b'crosschecking'), unit=_(b'files'), total=total
426 _(b'crosschecking'), unit=_(b'files'), total=total
431 )
427 )
432 if self.havemf:
428 if self.havemf:
433 for f in sorted(filelinkrevs):
429 for f in sorted(filelinkrevs):
434 progress.increment()
430 progress.increment()
435 if f not in filenodes:
431 if f not in filenodes:
436 lr = filelinkrevs[f][0]
432 lr = filelinkrevs[f][0]
437 self._err(lr, _(b"in changeset but not in manifest"), f)
433 self._err(lr, _(b"in changeset but not in manifest"), f)
438
434
439 if self.havecl:
435 if self.havecl:
440 for f in sorted(filenodes):
436 for f in sorted(filenodes):
441 progress.increment()
437 progress.increment()
442 if f not in filelinkrevs:
438 if f not in filelinkrevs:
443 try:
439 try:
444 fl = repo.file(f)
440 fl = repo.file(f)
445 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
441 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
446 except Exception:
442 except Exception:
447 lr = None
443 lr = None
448 self._err(lr, _(b"in manifest but not in changeset"), f)
444 self._err(lr, _(b"in manifest but not in changeset"), f)
449
445
450 progress.complete()
446 progress.complete()
451
447
452 def _verifyfiles(self, filenodes, filelinkrevs):
448 def _verifyfiles(self, filenodes, filelinkrevs):
453 repo = self.repo
449 repo = self.repo
454 ui = self.ui
450 ui = self.ui
455 lrugetctx = self.lrugetctx
451 lrugetctx = self.lrugetctx
456 revlogv1 = self.revlogv1
452 revlogv1 = self.revlogv1
457 havemf = self.havemf
453 havemf = self.havemf
458 ui.status(_(b"checking files\n"))
454 ui.status(_(b"checking files\n"))
459
455
460 storefiles = set()
456 storefiles = set()
461 for rl_type, f, f2, size in repo.store.datafiles():
457 for rl_type, f, f2, size in repo.store.datafiles():
462 if not f:
458 if not f:
463 self._err(None, _(b"cannot decode filename '%s'") % f2)
459 self._err(None, _(b"cannot decode filename '%s'") % f2)
464 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
460 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
465 storefiles.add(_normpath(f))
461 storefiles.add(_normpath(f))
466
462
467 state = {
463 state = {
468 # TODO this assumes revlog storage for changelog.
464 # TODO this assumes revlog storage for changelog.
469 b'expectedversion': self.repo.changelog._format_version,
465 b'expectedversion': self.repo.changelog._format_version,
470 b'skipflags': self.skipflags,
466 b'skipflags': self.skipflags,
471 # experimental config: censor.policy
467 # experimental config: censor.policy
472 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
468 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
473 }
469 }
474
470
475 files = sorted(set(filenodes) | set(filelinkrevs))
471 files = sorted(set(filenodes) | set(filelinkrevs))
476 revisions = 0
472 revisions = 0
477 progress = ui.makeprogress(
473 progress = ui.makeprogress(
478 _(b'checking'), unit=_(b'files'), total=len(files)
474 _(b'checking'), unit=_(b'files'), total=len(files)
479 )
475 )
480 for i, f in enumerate(files):
476 for i, f in enumerate(files):
481 progress.update(i, item=f)
477 progress.update(i, item=f)
482 try:
478 try:
483 linkrevs = filelinkrevs[f]
479 linkrevs = filelinkrevs[f]
484 except KeyError:
480 except KeyError:
485 # in manifest but not in changelog
481 # in manifest but not in changelog
486 linkrevs = []
482 linkrevs = []
487
483
488 if linkrevs:
484 if linkrevs:
489 lr = linkrevs[0]
485 lr = linkrevs[0]
490 else:
486 else:
491 lr = None
487 lr = None
492
488
493 try:
489 try:
494 fl = repo.file(f)
490 fl = repo.file(f)
495 except error.StorageError as e:
491 except error.StorageError as e:
496 self._err(lr, _(b"broken revlog! (%s)") % e, f)
492 self._err(lr, _(b"broken revlog! (%s)") % e, f)
497 continue
493 continue
498
494
499 for ff in fl.files():
495 for ff in fl.files():
500 try:
496 try:
501 storefiles.remove(ff)
497 storefiles.remove(ff)
502 except KeyError:
498 except KeyError:
503 if self.warnorphanstorefiles:
499 if self.warnorphanstorefiles:
504 self._warn(
500 self._warn(
505 _(b" warning: revlog '%s' not in fncache!") % ff
501 _(b" warning: revlog '%s' not in fncache!") % ff
506 )
502 )
507 self.fncachewarned = True
503 self.fncachewarned = True
508
504
509 if not len(fl) and (self.havecl or self.havemf):
505 if not len(fl) and (self.havecl or self.havemf):
510 self._err(lr, _(b"empty or missing %s") % f)
506 self._err(lr, _(b"empty or missing %s") % f)
511 else:
507 else:
512 # Guard against implementations not setting this.
508 # Guard against implementations not setting this.
513 state[b'skipread'] = set()
509 state[b'skipread'] = set()
514 state[b'safe_renamed'] = set()
510 state[b'safe_renamed'] = set()
515
511
516 for problem in fl.verifyintegrity(state):
512 for problem in fl.verifyintegrity(state):
517 if problem.node is not None:
513 if problem.node is not None:
518 linkrev = fl.linkrev(fl.rev(problem.node))
514 linkrev = fl.linkrev(fl.rev(problem.node))
519 else:
515 else:
520 linkrev = None
516 linkrev = None
521
517
522 if problem.warning:
518 if problem.warning:
523 self._warn(problem.warning)
519 self._warn(problem.warning)
524 elif problem.error:
520 elif problem.error:
525 self._err(
521 self._err(
526 linkrev if linkrev is not None else lr,
522 linkrev if linkrev is not None else lr,
527 problem.error,
523 problem.error,
528 f,
524 f,
529 )
525 )
530 else:
526 else:
531 raise error.ProgrammingError(
527 raise error.ProgrammingError(
532 b'problem instance does not set warning or error '
528 b'problem instance does not set warning or error '
533 b'attribute: %s' % problem.msg
529 b'attribute: %s' % problem.msg
534 )
530 )
535
531
536 seen = {}
532 seen = {}
537 for i in fl:
533 for i in fl:
538 revisions += 1
534 revisions += 1
539 n = fl.node(i)
535 n = fl.node(i)
540 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
536 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
541 if f in filenodes:
537 if f in filenodes:
542 if havemf and n not in filenodes[f]:
538 if havemf and n not in filenodes[f]:
543 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
539 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
544 else:
540 else:
545 del filenodes[f][n]
541 del filenodes[f][n]
546
542
547 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
543 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
548 continue
544 continue
549
545
550 # check renames
546 # check renames
551 try:
547 try:
552 # This requires resolving fulltext (at least on revlogs,
548 # This requires resolving fulltext (at least on revlogs,
553 # though not with LFS revisions). We may want
549 # though not with LFS revisions). We may want
554 # ``verifyintegrity()`` to pass a set of nodes with
550 # ``verifyintegrity()`` to pass a set of nodes with
555 # rename metadata as an optimization.
551 # rename metadata as an optimization.
556 rp = fl.renamed(n)
552 rp = fl.renamed(n)
557 if rp:
553 if rp:
558 if lr is not None and ui.verbose:
554 if lr is not None and ui.verbose:
559 ctx = lrugetctx(lr)
555 ctx = lrugetctx(lr)
560 if not any(rp[0] in pctx for pctx in ctx.parents()):
556 if not any(rp[0] in pctx for pctx in ctx.parents()):
561 self._warn(
557 self._warn(
562 _(
558 _(
563 b"warning: copy source of '%s' not"
559 b"warning: copy source of '%s' not"
564 b" in parents of %s"
560 b" in parents of %s"
565 )
561 )
566 % (f, ctx)
562 % (f, ctx)
567 )
563 )
568 fl2 = repo.file(rp[0])
564 fl2 = repo.file(rp[0])
569 if not len(fl2):
565 if not len(fl2):
570 self._err(
566 self._err(
571 lr,
567 lr,
572 _(
568 _(
573 b"empty or missing copy source revlog "
569 b"empty or missing copy source revlog "
574 b"%s:%s"
570 b"%s:%s"
575 )
571 )
576 % (rp[0], short(rp[1])),
572 % (rp[0], short(rp[1])),
577 f,
573 f,
578 )
574 )
579 elif rp[1] == self.repo.nullid:
575 elif rp[1] == self.repo.nullid:
580 ui.note(
576 ui.note(
581 _(
577 _(
582 b"warning: %s@%s: copy source"
578 b"warning: %s@%s: copy source"
583 b" revision is nullid %s:%s\n"
579 b" revision is nullid %s:%s\n"
584 )
580 )
585 % (f, lr, rp[0], short(rp[1]))
581 % (f, lr, rp[0], short(rp[1]))
586 )
582 )
587 else:
583 else:
588 fl2.rev(rp[1])
584 fl2.rev(rp[1])
589 except Exception as inst:
585 except Exception as inst:
590 self._exc(
586 self._exc(
591 lr, _(b"checking rename of %s") % short(n), inst, f
587 lr, _(b"checking rename of %s") % short(n), inst, f
592 )
588 )
593
589
594 # cross-check
590 # cross-check
595 if f in filenodes:
591 if f in filenodes:
596 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
592 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
597 for lr, node in sorted(fns):
593 for lr, node in sorted(fns):
598 self._err(
594 self._err(
599 lr,
595 lr,
600 _(b"manifest refers to unknown revision %s")
596 _(b"manifest refers to unknown revision %s")
601 % short(node),
597 % short(node),
602 f,
598 f,
603 )
599 )
604 progress.complete()
600 progress.complete()
605
601
606 if self.warnorphanstorefiles:
602 if self.warnorphanstorefiles:
607 for f in sorted(storefiles):
603 for f in sorted(storefiles):
608 self._warn(_(b"warning: orphan data file '%s'") % f)
604 self._warn(_(b"warning: orphan data file '%s'") % f)
609
605
610 return len(files), revisions
606 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now