##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48158:041d6515 default
parent child Browse files
Show More
@@ -1,590 +1,590 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
46 b"parent-directory manifest refers to unknown revision %s"
47 )
47 )
48
48
49 WARN_UNKNOWN_COPY_SOURCE = _(
49 WARN_UNKNOWN_COPY_SOURCE = _(
50 b"warning: copy source of '%s' not in parents of %s"
50 b"warning: copy source of '%s' not in parents of %s"
51 )
51 )
52
52
53 WARN_NULLID_COPY_SOURCE = _(
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 )
56
53
57
54 class verifier(object):
58 class verifier(object):
55 def __init__(self, repo, level=None):
59 def __init__(self, repo, level=None):
56 self.repo = repo.unfiltered()
60 self.repo = repo.unfiltered()
57 self.ui = repo.ui
61 self.ui = repo.ui
58 self.match = repo.narrowmatch()
62 self.match = repo.narrowmatch()
59 if level is None:
63 if level is None:
60 level = VERIFY_DEFAULT
64 level = VERIFY_DEFAULT
61 self._level = level
65 self._level = level
62 self.badrevs = set()
66 self.badrevs = set()
63 self.errors = 0
67 self.errors = 0
64 self.warnings = 0
68 self.warnings = 0
65 self.havecl = len(repo.changelog) > 0
69 self.havecl = len(repo.changelog) > 0
66 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
67 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
68 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
69 self.refersmf = False
73 self.refersmf = False
70 self.fncachewarned = False
74 self.fncachewarned = False
71 # developer config: verify.skipflags
75 # developer config: verify.skipflags
72 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
73 self.warnorphanstorefiles = True
77 self.warnorphanstorefiles = True
74
78
75 def _warn(self, msg):
79 def _warn(self, msg):
76 """record a "warning" level issue"""
80 """record a "warning" level issue"""
77 self.ui.warn(msg + b"\n")
81 self.ui.warn(msg + b"\n")
78 self.warnings += 1
82 self.warnings += 1
79
83
80 def _err(self, linkrev, msg, filename=None):
84 def _err(self, linkrev, msg, filename=None):
81 """record a "error" level issue"""
85 """record a "error" level issue"""
82 if linkrev is not None:
86 if linkrev is not None:
83 self.badrevs.add(linkrev)
87 self.badrevs.add(linkrev)
84 linkrev = b"%d" % linkrev
88 linkrev = b"%d" % linkrev
85 else:
89 else:
86 linkrev = b'?'
90 linkrev = b'?'
87 msg = b"%s: %s" % (linkrev, msg)
91 msg = b"%s: %s" % (linkrev, msg)
88 if filename:
92 if filename:
89 msg = b"%s@%s" % (filename, msg)
93 msg = b"%s@%s" % (filename, msg)
90 self.ui.warn(b" " + msg + b"\n")
94 self.ui.warn(b" " + msg + b"\n")
91 self.errors += 1
95 self.errors += 1
92
96
93 def _exc(self, linkrev, msg, inst, filename=None):
97 def _exc(self, linkrev, msg, inst, filename=None):
94 """record exception raised during the verify process"""
98 """record exception raised during the verify process"""
95 fmsg = stringutil.forcebytestr(inst)
99 fmsg = stringutil.forcebytestr(inst)
96 if not fmsg:
100 if not fmsg:
97 fmsg = pycompat.byterepr(inst)
101 fmsg = pycompat.byterepr(inst)
98 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
99
103
100 def _checkrevlog(self, obj, name, linkrev):
104 def _checkrevlog(self, obj, name, linkrev):
101 """verify high level property of a revlog
105 """verify high level property of a revlog
102
106
103 - revlog is present,
107 - revlog is present,
104 - revlog is non-empty,
108 - revlog is non-empty,
105 - sizes (index and data) are correct,
109 - sizes (index and data) are correct,
106 - revlog's format version is correct.
110 - revlog's format version is correct.
107 """
111 """
108 if not len(obj) and (self.havecl or self.havemf):
112 if not len(obj) and (self.havecl or self.havemf):
109 self._err(linkrev, _(b"empty or missing %s") % name)
113 self._err(linkrev, _(b"empty or missing %s") % name)
110 return
114 return
111
115
112 d = obj.checksize()
116 d = obj.checksize()
113 if d[0]:
117 if d[0]:
114 self._err(None, _(b"data length off by %d bytes") % d[0], name)
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
115 if d[1]:
119 if d[1]:
116 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
117
121
118 if obj._format_version != revlog.REVLOGV0:
122 if obj._format_version != revlog.REVLOGV0:
119 if not self.revlogv1:
123 if not self.revlogv1:
120 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
121 elif self.revlogv1:
125 elif self.revlogv1:
122 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
123
127
124 def _checkentry(self, obj, i, node, seen, linkrevs, f):
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
125 """verify a single revlog entry
129 """verify a single revlog entry
126
130
127 arguments are:
131 arguments are:
128 - obj: the source revlog
132 - obj: the source revlog
129 - i: the revision number
133 - i: the revision number
130 - node: the revision node id
134 - node: the revision node id
131 - seen: nodes previously seen for this revlog
135 - seen: nodes previously seen for this revlog
132 - linkrevs: [changelog-revisions] introducing "node"
136 - linkrevs: [changelog-revisions] introducing "node"
133 - f: string label ("changelog", "manifest", or filename)
137 - f: string label ("changelog", "manifest", or filename)
134
138
135 Performs the following checks:
139 Performs the following checks:
136 - linkrev points to an existing changelog revision,
140 - linkrev points to an existing changelog revision,
137 - linkrev points to a changelog revision that introduces this revision,
141 - linkrev points to a changelog revision that introduces this revision,
138 - linkrev points to the lowest of these changesets,
142 - linkrev points to the lowest of these changesets,
139 - both parents exist in the revlog,
143 - both parents exist in the revlog,
140 - the revision is not duplicated.
144 - the revision is not duplicated.
141
145
142 Return the linkrev of the revision (or None for changelog's revisions).
146 Return the linkrev of the revision (or None for changelog's revisions).
143 """
147 """
144 lr = obj.linkrev(obj.rev(node))
148 lr = obj.linkrev(obj.rev(node))
145 if lr < 0 or (self.havecl and lr not in linkrevs):
149 if lr < 0 or (self.havecl and lr not in linkrevs):
146 if lr < 0 or lr >= len(self.repo.changelog):
150 if lr < 0 or lr >= len(self.repo.changelog):
147 msg = _(b"rev %d points to nonexistent changeset %d")
151 msg = _(b"rev %d points to nonexistent changeset %d")
148 else:
152 else:
149 msg = _(b"rev %d points to unexpected changeset %d")
153 msg = _(b"rev %d points to unexpected changeset %d")
150 self._err(None, msg % (i, lr), f)
154 self._err(None, msg % (i, lr), f)
151 if linkrevs:
155 if linkrevs:
152 if f and len(linkrevs) > 1:
156 if f and len(linkrevs) > 1:
153 try:
157 try:
154 # attempt to filter down to real linkrevs
158 # attempt to filter down to real linkrevs
155 linkrevs = []
159 linkrevs = []
156 for lr in linkrevs:
160 for lr in linkrevs:
157 if self.lrugetctx(lr)[f].filenode() == node:
161 if self.lrugetctx(lr)[f].filenode() == node:
158 linkrevs.append(lr)
162 linkrevs.append(lr)
159 except Exception:
163 except Exception:
160 pass
164 pass
161 msg = _(b" (expected %s)")
165 msg = _(b" (expected %s)")
162 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
163 self._warn(msg)
167 self._warn(msg)
164 lr = None # can't be trusted
168 lr = None # can't be trusted
165
169
166 try:
170 try:
167 p1, p2 = obj.parents(node)
171 p1, p2 = obj.parents(node)
168 if p1 not in seen and p1 != self.repo.nullid:
172 if p1 not in seen and p1 != self.repo.nullid:
169 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
170 self._err(lr, msg, f)
174 self._err(lr, msg, f)
171 if p2 not in seen and p2 != self.repo.nullid:
175 if p2 not in seen and p2 != self.repo.nullid:
172 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
173 self._err(lr, msg, f)
177 self._err(lr, msg, f)
174 except Exception as inst:
178 except Exception as inst:
175 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
176
180
177 if node in seen:
181 if node in seen:
178 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
179 seen[node] = i
183 seen[node] = i
180 return lr
184 return lr
181
185
182 def verify(self):
186 def verify(self):
183 """verify the content of the Mercurial repository
187 """verify the content of the Mercurial repository
184
188
185 This method run all verifications, displaying issues as they are found.
189 This method run all verifications, displaying issues as they are found.
186
190
187 return 1 if any error have been encountered, 0 otherwise."""
191 return 1 if any error have been encountered, 0 otherwise."""
188 # initial validation and generic report
192 # initial validation and generic report
189 repo = self.repo
193 repo = self.repo
190 ui = repo.ui
194 ui = repo.ui
191 if not repo.url().startswith(b'file:'):
195 if not repo.url().startswith(b'file:'):
192 raise error.Abort(_(b"cannot verify bundle or remote repos"))
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
193
197
194 if os.path.exists(repo.sjoin(b"journal")):
198 if os.path.exists(repo.sjoin(b"journal")):
195 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
196
200
197 if ui.verbose or not self.revlogv1:
201 if ui.verbose or not self.revlogv1:
198 ui.status(
202 ui.status(
199 _(b"repository uses revlog format %d\n")
203 _(b"repository uses revlog format %d\n")
200 % (self.revlogv1 and 1 or 0)
204 % (self.revlogv1 and 1 or 0)
201 )
205 )
202
206
203 # data verification
207 # data verification
204 mflinkrevs, filelinkrevs = self._verifychangelog()
208 mflinkrevs, filelinkrevs = self._verifychangelog()
205 filenodes = self._verifymanifest(mflinkrevs)
209 filenodes = self._verifymanifest(mflinkrevs)
206 del mflinkrevs
210 del mflinkrevs
207 self._crosscheckfiles(filelinkrevs, filenodes)
211 self._crosscheckfiles(filelinkrevs, filenodes)
208 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
209
213
210 # final report
214 # final report
211 ui.status(
215 ui.status(
212 _(b"checked %d changesets with %d changes to %d files\n")
216 _(b"checked %d changesets with %d changes to %d files\n")
213 % (len(repo.changelog), filerevisions, totalfiles)
217 % (len(repo.changelog), filerevisions, totalfiles)
214 )
218 )
215 if self.warnings:
219 if self.warnings:
216 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
220 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
217 if self.fncachewarned:
221 if self.fncachewarned:
218 ui.warn(HINT_FNCACHE)
222 ui.warn(HINT_FNCACHE)
219 if self.errors:
223 if self.errors:
220 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
224 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
221 if self.badrevs:
225 if self.badrevs:
222 msg = _(b"(first damaged changeset appears to be %d)\n")
226 msg = _(b"(first damaged changeset appears to be %d)\n")
223 msg %= min(self.badrevs)
227 msg %= min(self.badrevs)
224 ui.warn(msg)
228 ui.warn(msg)
225 return 1
229 return 1
226 return 0
230 return 0
227
231
228 def _verifychangelog(self):
232 def _verifychangelog(self):
229 """verify the changelog of a repository
233 """verify the changelog of a repository
230
234
231 The following checks are performed:
235 The following checks are performed:
232 - all of `_checkrevlog` checks,
236 - all of `_checkrevlog` checks,
233 - all of `_checkentry` checks (for each revisions),
237 - all of `_checkentry` checks (for each revisions),
234 - each revision can be read.
238 - each revision can be read.
235
239
236 The function returns some of the data observed in the changesets as a
240 The function returns some of the data observed in the changesets as a
237 (mflinkrevs, filelinkrevs) tuples:
241 (mflinkrevs, filelinkrevs) tuples:
238 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
242 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
239 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
243 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
240
244
241 If a matcher was specified, filelinkrevs will only contains matched
245 If a matcher was specified, filelinkrevs will only contains matched
242 files.
246 files.
243 """
247 """
244 ui = self.ui
248 ui = self.ui
245 repo = self.repo
249 repo = self.repo
246 match = self.match
250 match = self.match
247 cl = repo.changelog
251 cl = repo.changelog
248
252
249 ui.status(_(b"checking changesets\n"))
253 ui.status(_(b"checking changesets\n"))
250 mflinkrevs = {}
254 mflinkrevs = {}
251 filelinkrevs = {}
255 filelinkrevs = {}
252 seen = {}
256 seen = {}
253 self._checkrevlog(cl, b"changelog", 0)
257 self._checkrevlog(cl, b"changelog", 0)
254 progress = ui.makeprogress(
258 progress = ui.makeprogress(
255 _(b'checking'), unit=_(b'changesets'), total=len(repo)
259 _(b'checking'), unit=_(b'changesets'), total=len(repo)
256 )
260 )
257 for i in repo:
261 for i in repo:
258 progress.update(i)
262 progress.update(i)
259 n = cl.node(i)
263 n = cl.node(i)
260 self._checkentry(cl, i, n, seen, [i], b"changelog")
264 self._checkentry(cl, i, n, seen, [i], b"changelog")
261
265
262 try:
266 try:
263 changes = cl.read(n)
267 changes = cl.read(n)
264 if changes[0] != self.repo.nullid:
268 if changes[0] != self.repo.nullid:
265 mflinkrevs.setdefault(changes[0], []).append(i)
269 mflinkrevs.setdefault(changes[0], []).append(i)
266 self.refersmf = True
270 self.refersmf = True
267 for f in changes[3]:
271 for f in changes[3]:
268 if match(f):
272 if match(f):
269 filelinkrevs.setdefault(_normpath(f), []).append(i)
273 filelinkrevs.setdefault(_normpath(f), []).append(i)
270 except Exception as inst:
274 except Exception as inst:
271 self.refersmf = True
275 self.refersmf = True
272 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
276 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
273 progress.complete()
277 progress.complete()
274 return mflinkrevs, filelinkrevs
278 return mflinkrevs, filelinkrevs
275
279
276 def _verifymanifest(
280 def _verifymanifest(
277 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
281 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
278 ):
282 ):
279 """verify the manifestlog content
283 """verify the manifestlog content
280
284
281 Inputs:
285 Inputs:
282 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
286 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
283 - dir: a subdirectory to check (for tree manifest repo)
287 - dir: a subdirectory to check (for tree manifest repo)
284 - storefiles: set of currently "orphan" files.
288 - storefiles: set of currently "orphan" files.
285 - subdirprogress: a progress object
289 - subdirprogress: a progress object
286
290
287 This function checks:
291 This function checks:
288 * all of `_checkrevlog` checks (for all manifest related revlogs)
292 * all of `_checkrevlog` checks (for all manifest related revlogs)
289 * all of `_checkentry` checks (for all manifest related revisions)
293 * all of `_checkentry` checks (for all manifest related revisions)
290 * nodes for subdirectory exists in the sub-directory manifest
294 * nodes for subdirectory exists in the sub-directory manifest
291 * each manifest entries have a file path
295 * each manifest entries have a file path
292 * each manifest node refered in mflinkrevs exist in the manifest log
296 * each manifest node refered in mflinkrevs exist in the manifest log
293
297
294 If tree manifest is in use and a matchers is specified, only the
298 If tree manifest is in use and a matchers is specified, only the
295 sub-directories matching it will be verified.
299 sub-directories matching it will be verified.
296
300
297 return a two level mapping:
301 return a two level mapping:
298 {"path" -> { filenode -> changelog-revision}}
302 {"path" -> { filenode -> changelog-revision}}
299
303
300 This mapping primarily contains entries for every files in the
304 This mapping primarily contains entries for every files in the
301 repository. In addition, when tree-manifest is used, it also contains
305 repository. In addition, when tree-manifest is used, it also contains
302 sub-directory entries.
306 sub-directory entries.
303
307
304 If a matcher is provided, only matching paths will be included.
308 If a matcher is provided, only matching paths will be included.
305 """
309 """
306 repo = self.repo
310 repo = self.repo
307 ui = self.ui
311 ui = self.ui
308 match = self.match
312 match = self.match
309 mfl = self.repo.manifestlog
313 mfl = self.repo.manifestlog
310 mf = mfl.getstorage(dir)
314 mf = mfl.getstorage(dir)
311
315
312 if not dir:
316 if not dir:
313 self.ui.status(_(b"checking manifests\n"))
317 self.ui.status(_(b"checking manifests\n"))
314
318
315 filenodes = {}
319 filenodes = {}
316 subdirnodes = {}
320 subdirnodes = {}
317 seen = {}
321 seen = {}
318 label = b"manifest"
322 label = b"manifest"
319 if dir:
323 if dir:
320 label = dir
324 label = dir
321 revlogfiles = mf.files()
325 revlogfiles = mf.files()
322 storefiles.difference_update(revlogfiles)
326 storefiles.difference_update(revlogfiles)
323 if subdirprogress: # should be true since we're in a subdirectory
327 if subdirprogress: # should be true since we're in a subdirectory
324 subdirprogress.increment()
328 subdirprogress.increment()
325 if self.refersmf:
329 if self.refersmf:
326 # Do not check manifest if there are only changelog entries with
330 # Do not check manifest if there are only changelog entries with
327 # null manifests.
331 # null manifests.
328 self._checkrevlog(mf._revlog, label, 0)
332 self._checkrevlog(mf._revlog, label, 0)
329 progress = ui.makeprogress(
333 progress = ui.makeprogress(
330 _(b'checking'), unit=_(b'manifests'), total=len(mf)
334 _(b'checking'), unit=_(b'manifests'), total=len(mf)
331 )
335 )
332 for i in mf:
336 for i in mf:
333 if not dir:
337 if not dir:
334 progress.update(i)
338 progress.update(i)
335 n = mf.node(i)
339 n = mf.node(i)
336 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
340 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
337 if n in mflinkrevs:
341 if n in mflinkrevs:
338 del mflinkrevs[n]
342 del mflinkrevs[n]
339 elif dir:
343 elif dir:
340 msg = _(b"%s not in parent-directory manifest") % short(n)
344 msg = _(b"%s not in parent-directory manifest") % short(n)
341 self._err(lr, msg, label)
345 self._err(lr, msg, label)
342 else:
346 else:
343 self._err(lr, _(b"%s not in changesets") % short(n), label)
347 self._err(lr, _(b"%s not in changesets") % short(n), label)
344
348
345 try:
349 try:
346 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
347 for f, fn, fl in mfdelta.iterentries():
351 for f, fn, fl in mfdelta.iterentries():
348 if not f:
352 if not f:
349 self._err(lr, _(b"entry without name in manifest"))
353 self._err(lr, _(b"entry without name in manifest"))
350 elif f == b"/dev/null": # ignore this in very old repos
354 elif f == b"/dev/null": # ignore this in very old repos
351 continue
355 continue
352 fullpath = dir + _normpath(f)
356 fullpath = dir + _normpath(f)
353 if fl == b't':
357 if fl == b't':
354 if not match.visitdir(fullpath):
358 if not match.visitdir(fullpath):
355 continue
359 continue
356 sdn = subdirnodes.setdefault(fullpath + b'/', {})
360 sdn = subdirnodes.setdefault(fullpath + b'/', {})
357 sdn.setdefault(fn, []).append(lr)
361 sdn.setdefault(fn, []).append(lr)
358 else:
362 else:
359 if not match(fullpath):
363 if not match(fullpath):
360 continue
364 continue
361 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
362 except Exception as inst:
366 except Exception as inst:
363 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
364 if self._level >= VERIFY_FULL:
368 if self._level >= VERIFY_FULL:
365 try:
369 try:
366 # Various issues can affect manifest. So we read each full
370 # Various issues can affect manifest. So we read each full
367 # text from storage. This triggers the checks from the core
371 # text from storage. This triggers the checks from the core
368 # code (eg: hash verification, filename are ordered, etc.)
372 # code (eg: hash verification, filename are ordered, etc.)
369 mfdelta = mfl.get(dir, n).read()
373 mfdelta = mfl.get(dir, n).read()
370 except Exception as inst:
374 except Exception as inst:
371 msg = _(b"reading full manifest %s") % short(n)
375 msg = _(b"reading full manifest %s") % short(n)
372 self._exc(lr, msg, inst, label)
376 self._exc(lr, msg, inst, label)
373
377
374 if not dir:
378 if not dir:
375 progress.complete()
379 progress.complete()
376
380
377 if self.havemf:
381 if self.havemf:
378 # since we delete entry in `mflinkrevs` during iteration, any
382 # since we delete entry in `mflinkrevs` during iteration, any
379 # remaining entries are "missing". We need to issue errors for them.
383 # remaining entries are "missing". We need to issue errors for them.
380 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
381 for c, m in sorted(changesetpairs):
385 for c, m in sorted(changesetpairs):
382 if dir:
386 if dir:
383 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
387 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
384 else:
388 else:
385 msg = _(b"changeset refers to unknown revision %s")
389 msg = _(b"changeset refers to unknown revision %s")
386 msg %= short(m)
390 msg %= short(m)
387 self._err(c, msg, label)
391 self._err(c, msg, label)
388
392
389 if not dir and subdirnodes:
393 if not dir and subdirnodes:
390 self.ui.status(_(b"checking directory manifests\n"))
394 self.ui.status(_(b"checking directory manifests\n"))
391 storefiles = set()
395 storefiles = set()
392 subdirs = set()
396 subdirs = set()
393 revlogv1 = self.revlogv1
397 revlogv1 = self.revlogv1
394 for t, f, f2, size in repo.store.datafiles():
398 for t, f, f2, size in repo.store.datafiles():
395 if not f:
399 if not f:
396 self._err(None, _(b"cannot decode filename '%s'") % f2)
400 self._err(None, _(b"cannot decode filename '%s'") % f2)
397 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
401 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
398 storefiles.add(_normpath(f))
402 storefiles.add(_normpath(f))
399 subdirs.add(os.path.dirname(f))
403 subdirs.add(os.path.dirname(f))
400 subdirprogress = ui.makeprogress(
404 subdirprogress = ui.makeprogress(
401 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
402 )
406 )
403
407
404 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
408 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
405 subdirfilenodes = self._verifymanifest(
409 subdirfilenodes = self._verifymanifest(
406 linkrevs, subdir, storefiles, subdirprogress
410 linkrevs, subdir, storefiles, subdirprogress
407 )
411 )
408 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
412 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
409 filenodes.setdefault(f, {}).update(onefilenodes)
413 filenodes.setdefault(f, {}).update(onefilenodes)
410
414
411 if not dir and subdirnodes:
415 if not dir and subdirnodes:
412 assert subdirprogress is not None # help pytype
416 assert subdirprogress is not None # help pytype
413 subdirprogress.complete()
417 subdirprogress.complete()
414 if self.warnorphanstorefiles:
418 if self.warnorphanstorefiles:
415 for f in sorted(storefiles):
419 for f in sorted(storefiles):
416 self._warn(_(b"warning: orphan data file '%s'") % f)
420 self._warn(_(b"warning: orphan data file '%s'") % f)
417
421
418 return filenodes
422 return filenodes
419
423
420 def _crosscheckfiles(self, filelinkrevs, filenodes):
424 def _crosscheckfiles(self, filelinkrevs, filenodes):
421 repo = self.repo
425 repo = self.repo
422 ui = self.ui
426 ui = self.ui
423 ui.status(_(b"crosschecking files in changesets and manifests\n"))
427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
424
428
425 total = len(filelinkrevs) + len(filenodes)
429 total = len(filelinkrevs) + len(filenodes)
426 progress = ui.makeprogress(
430 progress = ui.makeprogress(
427 _(b'crosschecking'), unit=_(b'files'), total=total
431 _(b'crosschecking'), unit=_(b'files'), total=total
428 )
432 )
429 if self.havemf:
433 if self.havemf:
430 for f in sorted(filelinkrevs):
434 for f in sorted(filelinkrevs):
431 progress.increment()
435 progress.increment()
432 if f not in filenodes:
436 if f not in filenodes:
433 lr = filelinkrevs[f][0]
437 lr = filelinkrevs[f][0]
434 self._err(lr, _(b"in changeset but not in manifest"), f)
438 self._err(lr, _(b"in changeset but not in manifest"), f)
435
439
436 if self.havecl:
440 if self.havecl:
437 for f in sorted(filenodes):
441 for f in sorted(filenodes):
438 progress.increment()
442 progress.increment()
439 if f not in filelinkrevs:
443 if f not in filelinkrevs:
440 try:
444 try:
441 fl = repo.file(f)
445 fl = repo.file(f)
442 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
443 except Exception:
447 except Exception:
444 lr = None
448 lr = None
445 self._err(lr, _(b"in manifest but not in changeset"), f)
449 self._err(lr, _(b"in manifest but not in changeset"), f)
446
450
447 progress.complete()
451 progress.complete()
448
452
449 def _verifyfiles(self, filenodes, filelinkrevs):
453 def _verifyfiles(self, filenodes, filelinkrevs):
450 repo = self.repo
454 repo = self.repo
451 ui = self.ui
455 ui = self.ui
452 lrugetctx = self.lrugetctx
456 lrugetctx = self.lrugetctx
453 revlogv1 = self.revlogv1
457 revlogv1 = self.revlogv1
454 havemf = self.havemf
458 havemf = self.havemf
455 ui.status(_(b"checking files\n"))
459 ui.status(_(b"checking files\n"))
456
460
457 storefiles = set()
461 storefiles = set()
458 for rl_type, f, f2, size in repo.store.datafiles():
462 for rl_type, f, f2, size in repo.store.datafiles():
459 if not f:
463 if not f:
460 self._err(None, _(b"cannot decode filename '%s'") % f2)
464 self._err(None, _(b"cannot decode filename '%s'") % f2)
461 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
465 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
462 storefiles.add(_normpath(f))
466 storefiles.add(_normpath(f))
463
467
464 state = {
468 state = {
465 # TODO this assumes revlog storage for changelog.
469 # TODO this assumes revlog storage for changelog.
466 b'expectedversion': self.repo.changelog._format_version,
470 b'expectedversion': self.repo.changelog._format_version,
467 b'skipflags': self.skipflags,
471 b'skipflags': self.skipflags,
468 # experimental config: censor.policy
472 # experimental config: censor.policy
469 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
473 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
470 }
474 }
471
475
472 files = sorted(set(filenodes) | set(filelinkrevs))
476 files = sorted(set(filenodes) | set(filelinkrevs))
473 revisions = 0
477 revisions = 0
474 progress = ui.makeprogress(
478 progress = ui.makeprogress(
475 _(b'checking'), unit=_(b'files'), total=len(files)
479 _(b'checking'), unit=_(b'files'), total=len(files)
476 )
480 )
477 for i, f in enumerate(files):
481 for i, f in enumerate(files):
478 progress.update(i, item=f)
482 progress.update(i, item=f)
479 try:
483 try:
480 linkrevs = filelinkrevs[f]
484 linkrevs = filelinkrevs[f]
481 except KeyError:
485 except KeyError:
482 # in manifest but not in changelog
486 # in manifest but not in changelog
483 linkrevs = []
487 linkrevs = []
484
488
485 if linkrevs:
489 if linkrevs:
486 lr = linkrevs[0]
490 lr = linkrevs[0]
487 else:
491 else:
488 lr = None
492 lr = None
489
493
490 try:
494 try:
491 fl = repo.file(f)
495 fl = repo.file(f)
492 except error.StorageError as e:
496 except error.StorageError as e:
493 self._err(lr, _(b"broken revlog! (%s)") % e, f)
497 self._err(lr, _(b"broken revlog! (%s)") % e, f)
494 continue
498 continue
495
499
496 for ff in fl.files():
500 for ff in fl.files():
497 try:
501 try:
498 storefiles.remove(ff)
502 storefiles.remove(ff)
499 except KeyError:
503 except KeyError:
500 if self.warnorphanstorefiles:
504 if self.warnorphanstorefiles:
501 msg = _(b" warning: revlog '%s' not in fncache!")
505 msg = _(b" warning: revlog '%s' not in fncache!")
502 self._warn(msg % ff)
506 self._warn(msg % ff)
503 self.fncachewarned = True
507 self.fncachewarned = True
504
508
505 if not len(fl) and (self.havecl or self.havemf):
509 if not len(fl) and (self.havecl or self.havemf):
506 self._err(lr, _(b"empty or missing %s") % f)
510 self._err(lr, _(b"empty or missing %s") % f)
507 else:
511 else:
508 # Guard against implementations not setting this.
512 # Guard against implementations not setting this.
509 state[b'skipread'] = set()
513 state[b'skipread'] = set()
510 state[b'safe_renamed'] = set()
514 state[b'safe_renamed'] = set()
511
515
512 for problem in fl.verifyintegrity(state):
516 for problem in fl.verifyintegrity(state):
513 if problem.node is not None:
517 if problem.node is not None:
514 linkrev = fl.linkrev(fl.rev(problem.node))
518 linkrev = fl.linkrev(fl.rev(problem.node))
515 else:
519 else:
516 linkrev = None
520 linkrev = None
517
521
518 if problem.warning:
522 if problem.warning:
519 self._warn(problem.warning)
523 self._warn(problem.warning)
520 elif problem.error:
524 elif problem.error:
521 linkrev_msg = linkrev if linkrev is not None else lr
525 linkrev_msg = linkrev if linkrev is not None else lr
522 self._err(linkrev_msg, problem.error, f)
526 self._err(linkrev_msg, problem.error, f)
523 else:
527 else:
524 raise error.ProgrammingError(
528 raise error.ProgrammingError(
525 b'problem instance does not set warning or error '
529 b'problem instance does not set warning or error '
526 b'attribute: %s' % problem.msg
530 b'attribute: %s' % problem.msg
527 )
531 )
528
532
529 seen = {}
533 seen = {}
530 for i in fl:
534 for i in fl:
531 revisions += 1
535 revisions += 1
532 n = fl.node(i)
536 n = fl.node(i)
533 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
537 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
534 if f in filenodes:
538 if f in filenodes:
535 if havemf and n not in filenodes[f]:
539 if havemf and n not in filenodes[f]:
536 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
540 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
537 else:
541 else:
538 del filenodes[f][n]
542 del filenodes[f][n]
539
543
540 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
544 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
541 continue
545 continue
542
546
543 # check renames
547 # check renames
544 try:
548 try:
545 # This requires resolving fulltext (at least on revlogs,
549 # This requires resolving fulltext (at least on revlogs,
546 # though not with LFS revisions). We may want
550 # though not with LFS revisions). We may want
547 # ``verifyintegrity()`` to pass a set of nodes with
551 # ``verifyintegrity()`` to pass a set of nodes with
548 # rename metadata as an optimization.
552 # rename metadata as an optimization.
549 rp = fl.renamed(n)
553 rp = fl.renamed(n)
550 if rp:
554 if rp:
551 if lr is not None and ui.verbose:
555 if lr is not None and ui.verbose:
552 ctx = lrugetctx(lr)
556 ctx = lrugetctx(lr)
553 if not any(rp[0] in pctx for pctx in ctx.parents()):
557 if not any(rp[0] in pctx for pctx in ctx.parents()):
554 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
558 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
555 fl2 = repo.file(rp[0])
559 fl2 = repo.file(rp[0])
556 if not len(fl2):
560 if not len(fl2):
557 m = _(b"empty or missing copy source revlog %s:%s")
561 m = _(b"empty or missing copy source revlog %s:%s")
558 self._err(lr, m % (rp[0], short(rp[1])), f)
562 self._err(lr, m % (rp[0], short(rp[1])), f)
559 elif rp[1] == self.repo.nullid:
563 elif rp[1] == self.repo.nullid:
560 ui.note(
564 msg = WARN_NULLID_COPY_SOURCE
561 _(
565 msg %= (f, lr, rp[0], short(rp[1]))
562 b"warning: %s@%s: copy source"
566 ui.note(msg)
563 b" revision is nullid %s:%s\n"
564 )
565 % (f, lr, rp[0], short(rp[1]))
566 )
567 else:
567 else:
568 fl2.rev(rp[1])
568 fl2.rev(rp[1])
569 except Exception as inst:
569 except Exception as inst:
570 self._exc(
570 self._exc(
571 lr, _(b"checking rename of %s") % short(n), inst, f
571 lr, _(b"checking rename of %s") % short(n), inst, f
572 )
572 )
573
573
574 # cross-check
574 # cross-check
575 if f in filenodes:
575 if f in filenodes:
576 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
576 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
577 for lr, node in sorted(fns):
577 for lr, node in sorted(fns):
578 self._err(
578 self._err(
579 lr,
579 lr,
580 _(b"manifest refers to unknown revision %s")
580 _(b"manifest refers to unknown revision %s")
581 % short(node),
581 % short(node),
582 f,
582 f,
583 )
583 )
584 progress.complete()
584 progress.complete()
585
585
586 if self.warnorphanstorefiles:
586 if self.warnorphanstorefiles:
587 for f in sorted(storefiles):
587 for f in sorted(storefiles):
588 self._warn(_(b"warning: orphan data file '%s'") % f)
588 self._warn(_(b"warning: orphan data file '%s'") % f)
589
589
590 return len(files), revisions
590 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now