##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48144:0f4beb88 default
parent child Browse files
Show More
@@ -1,626 +1,625 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 class verifier(object):
41 class verifier(object):
42 def __init__(self, repo, level=None):
42 def __init__(self, repo, level=None):
43 self.repo = repo.unfiltered()
43 self.repo = repo.unfiltered()
44 self.ui = repo.ui
44 self.ui = repo.ui
45 self.match = repo.narrowmatch()
45 self.match = repo.narrowmatch()
46 if level is None:
46 if level is None:
47 level = VERIFY_DEFAULT
47 level = VERIFY_DEFAULT
48 self._level = level
48 self._level = level
49 self.badrevs = set()
49 self.badrevs = set()
50 self.errors = 0
50 self.errors = 0
51 self.warnings = 0
51 self.warnings = 0
52 self.havecl = len(repo.changelog) > 0
52 self.havecl = len(repo.changelog) > 0
53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
53 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
54 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
55 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
56 self.refersmf = False
56 self.refersmf = False
57 self.fncachewarned = False
57 self.fncachewarned = False
58 # developer config: verify.skipflags
58 # developer config: verify.skipflags
59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
59 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
60 self.warnorphanstorefiles = True
60 self.warnorphanstorefiles = True
61
61
62 def _warn(self, msg):
62 def _warn(self, msg):
63 """record a "warning" level issue"""
63 """record a "warning" level issue"""
64 self.ui.warn(msg + b"\n")
64 self.ui.warn(msg + b"\n")
65 self.warnings += 1
65 self.warnings += 1
66
66
67 def _err(self, linkrev, msg, filename=None):
67 def _err(self, linkrev, msg, filename=None):
68 """record a "error" level issue"""
68 """record a "error" level issue"""
69 if linkrev is not None:
69 if linkrev is not None:
70 self.badrevs.add(linkrev)
70 self.badrevs.add(linkrev)
71 linkrev = b"%d" % linkrev
71 linkrev = b"%d" % linkrev
72 else:
72 else:
73 linkrev = b'?'
73 linkrev = b'?'
74 msg = b"%s: %s" % (linkrev, msg)
74 msg = b"%s: %s" % (linkrev, msg)
75 if filename:
75 if filename:
76 msg = b"%s@%s" % (filename, msg)
76 msg = b"%s@%s" % (filename, msg)
77 self.ui.warn(b" " + msg + b"\n")
77 self.ui.warn(b" " + msg + b"\n")
78 self.errors += 1
78 self.errors += 1
79
79
80 def _exc(self, linkrev, msg, inst, filename=None):
80 def _exc(self, linkrev, msg, inst, filename=None):
81 """record exception raised during the verify process"""
81 """record exception raised during the verify process"""
82 fmsg = stringutil.forcebytestr(inst)
82 fmsg = stringutil.forcebytestr(inst)
83 if not fmsg:
83 if not fmsg:
84 fmsg = pycompat.byterepr(inst)
84 fmsg = pycompat.byterepr(inst)
85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
85 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
86
86
87 def _checkrevlog(self, obj, name, linkrev):
87 def _checkrevlog(self, obj, name, linkrev):
88 """verify high level property of a revlog
88 """verify high level property of a revlog
89
89
90 - revlog is present,
90 - revlog is present,
91 - revlog is non-empty,
91 - revlog is non-empty,
92 - sizes (index and data) are correct,
92 - sizes (index and data) are correct,
93 - revlog's format version is correct.
93 - revlog's format version is correct.
94 """
94 """
95 if not len(obj) and (self.havecl or self.havemf):
95 if not len(obj) and (self.havecl or self.havemf):
96 self._err(linkrev, _(b"empty or missing %s") % name)
96 self._err(linkrev, _(b"empty or missing %s") % name)
97 return
97 return
98
98
99 d = obj.checksize()
99 d = obj.checksize()
100 if d[0]:
100 if d[0]:
101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
101 self._err(None, _(b"data length off by %d bytes") % d[0], name)
102 if d[1]:
102 if d[1]:
103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
103 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
104
104
105 if obj._format_version != revlog.REVLOGV0:
105 if obj._format_version != revlog.REVLOGV0:
106 if not self.revlogv1:
106 if not self.revlogv1:
107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
107 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
108 elif self.revlogv1:
108 elif self.revlogv1:
109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
109 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
110
110
111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
111 def _checkentry(self, obj, i, node, seen, linkrevs, f):
112 """verify a single revlog entry
112 """verify a single revlog entry
113
113
114 arguments are:
114 arguments are:
115 - obj: the source revlog
115 - obj: the source revlog
116 - i: the revision number
116 - i: the revision number
117 - node: the revision node id
117 - node: the revision node id
118 - seen: nodes previously seen for this revlog
118 - seen: nodes previously seen for this revlog
119 - linkrevs: [changelog-revisions] introducing "node"
119 - linkrevs: [changelog-revisions] introducing "node"
120 - f: string label ("changelog", "manifest", or filename)
120 - f: string label ("changelog", "manifest", or filename)
121
121
122 Performs the following checks:
122 Performs the following checks:
123 - linkrev points to an existing changelog revision,
123 - linkrev points to an existing changelog revision,
124 - linkrev points to a changelog revision that introduces this revision,
124 - linkrev points to a changelog revision that introduces this revision,
125 - linkrev points to the lowest of these changesets,
125 - linkrev points to the lowest of these changesets,
126 - both parents exist in the revlog,
126 - both parents exist in the revlog,
127 - the revision is not duplicated.
127 - the revision is not duplicated.
128
128
129 Return the linkrev of the revision (or None for changelog's revisions).
129 Return the linkrev of the revision (or None for changelog's revisions).
130 """
130 """
131 lr = obj.linkrev(obj.rev(node))
131 lr = obj.linkrev(obj.rev(node))
132 if lr < 0 or (self.havecl and lr not in linkrevs):
132 if lr < 0 or (self.havecl and lr not in linkrevs):
133 if lr < 0 or lr >= len(self.repo.changelog):
133 if lr < 0 or lr >= len(self.repo.changelog):
134 msg = _(b"rev %d points to nonexistent changeset %d")
134 msg = _(b"rev %d points to nonexistent changeset %d")
135 else:
135 else:
136 msg = _(b"rev %d points to unexpected changeset %d")
136 msg = _(b"rev %d points to unexpected changeset %d")
137 self._err(None, msg % (i, lr), f)
137 self._err(None, msg % (i, lr), f)
138 if linkrevs:
138 if linkrevs:
139 if f and len(linkrevs) > 1:
139 if f and len(linkrevs) > 1:
140 try:
140 try:
141 # attempt to filter down to real linkrevs
141 # attempt to filter down to real linkrevs
142 linkrevs = []
142 linkrevs = []
143 for lr in linkrevs:
143 for lr in linkrevs:
144 if self.lrugetctx(lr)[f].filenode() == node:
144 if self.lrugetctx(lr)[f].filenode() == node:
145 linkrevs.append(lr)
145 linkrevs.append(lr)
146 except Exception:
146 except Exception:
147 pass
147 pass
148 self._warn(
148 msg = _(b" (expected %s)")
149 _(b" (expected %s)")
149 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
150 % b" ".join(map(pycompat.bytestr, linkrevs))
150 self._warn(msg)
151 )
152 lr = None # can't be trusted
151 lr = None # can't be trusted
153
152
154 try:
153 try:
155 p1, p2 = obj.parents(node)
154 p1, p2 = obj.parents(node)
156 if p1 not in seen and p1 != self.repo.nullid:
155 if p1 not in seen and p1 != self.repo.nullid:
157 self._err(
156 self._err(
158 lr,
157 lr,
159 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
158 _(b"unknown parent 1 %s of %s") % (short(p1), short(node)),
160 f,
159 f,
161 )
160 )
162 if p2 not in seen and p2 != self.repo.nullid:
161 if p2 not in seen and p2 != self.repo.nullid:
163 self._err(
162 self._err(
164 lr,
163 lr,
165 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
164 _(b"unknown parent 2 %s of %s") % (short(p2), short(node)),
166 f,
165 f,
167 )
166 )
168 except Exception as inst:
167 except Exception as inst:
169 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
168 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
170
169
171 if node in seen:
170 if node in seen:
172 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
171 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
173 seen[node] = i
172 seen[node] = i
174 return lr
173 return lr
175
174
176 def verify(self):
175 def verify(self):
177 """verify the content of the Mercurial repository
176 """verify the content of the Mercurial repository
178
177
179 This method run all verifications, displaying issues as they are found.
178 This method run all verifications, displaying issues as they are found.
180
179
181 return 1 if any error have been encountered, 0 otherwise."""
180 return 1 if any error have been encountered, 0 otherwise."""
182 # initial validation and generic report
181 # initial validation and generic report
183 repo = self.repo
182 repo = self.repo
184 ui = repo.ui
183 ui = repo.ui
185 if not repo.url().startswith(b'file:'):
184 if not repo.url().startswith(b'file:'):
186 raise error.Abort(_(b"cannot verify bundle or remote repos"))
185 raise error.Abort(_(b"cannot verify bundle or remote repos"))
187
186
188 if os.path.exists(repo.sjoin(b"journal")):
187 if os.path.exists(repo.sjoin(b"journal")):
189 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
188 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
190
189
191 if ui.verbose or not self.revlogv1:
190 if ui.verbose or not self.revlogv1:
192 ui.status(
191 ui.status(
193 _(b"repository uses revlog format %d\n")
192 _(b"repository uses revlog format %d\n")
194 % (self.revlogv1 and 1 or 0)
193 % (self.revlogv1 and 1 or 0)
195 )
194 )
196
195
197 # data verification
196 # data verification
198 mflinkrevs, filelinkrevs = self._verifychangelog()
197 mflinkrevs, filelinkrevs = self._verifychangelog()
199 filenodes = self._verifymanifest(mflinkrevs)
198 filenodes = self._verifymanifest(mflinkrevs)
200 del mflinkrevs
199 del mflinkrevs
201 self._crosscheckfiles(filelinkrevs, filenodes)
200 self._crosscheckfiles(filelinkrevs, filenodes)
202 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
201 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
203
202
204 # final report
203 # final report
205 ui.status(
204 ui.status(
206 _(b"checked %d changesets with %d changes to %d files\n")
205 _(b"checked %d changesets with %d changes to %d files\n")
207 % (len(repo.changelog), filerevisions, totalfiles)
206 % (len(repo.changelog), filerevisions, totalfiles)
208 )
207 )
209 if self.warnings:
208 if self.warnings:
210 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
209 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
211 if self.fncachewarned:
210 if self.fncachewarned:
212 ui.warn(
211 ui.warn(
213 _(
212 _(
214 b'hint: run "hg debugrebuildfncache" to recover from '
213 b'hint: run "hg debugrebuildfncache" to recover from '
215 b'corrupt fncache\n'
214 b'corrupt fncache\n'
216 )
215 )
217 )
216 )
218 if self.errors:
217 if self.errors:
219 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
218 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
220 if self.badrevs:
219 if self.badrevs:
221 ui.warn(
220 ui.warn(
222 _(b"(first damaged changeset appears to be %d)\n")
221 _(b"(first damaged changeset appears to be %d)\n")
223 % min(self.badrevs)
222 % min(self.badrevs)
224 )
223 )
225 return 1
224 return 1
226 return 0
225 return 0
227
226
228 def _verifychangelog(self):
227 def _verifychangelog(self):
229 """verify the changelog of a repository
228 """verify the changelog of a repository
230
229
231 The following checks are performed:
230 The following checks are performed:
232 - all of `_checkrevlog` checks,
231 - all of `_checkrevlog` checks,
233 - all of `_checkentry` checks (for each revisions),
232 - all of `_checkentry` checks (for each revisions),
234 - each revision can be read.
233 - each revision can be read.
235
234
236 The function returns some of the data observed in the changesets as a
235 The function returns some of the data observed in the changesets as a
237 (mflinkrevs, filelinkrevs) tuples:
236 (mflinkrevs, filelinkrevs) tuples:
238 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
237 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
239 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
238 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
240
239
241 If a matcher was specified, filelinkrevs will only contains matched
240 If a matcher was specified, filelinkrevs will only contains matched
242 files.
241 files.
243 """
242 """
244 ui = self.ui
243 ui = self.ui
245 repo = self.repo
244 repo = self.repo
246 match = self.match
245 match = self.match
247 cl = repo.changelog
246 cl = repo.changelog
248
247
249 ui.status(_(b"checking changesets\n"))
248 ui.status(_(b"checking changesets\n"))
250 mflinkrevs = {}
249 mflinkrevs = {}
251 filelinkrevs = {}
250 filelinkrevs = {}
252 seen = {}
251 seen = {}
253 self._checkrevlog(cl, b"changelog", 0)
252 self._checkrevlog(cl, b"changelog", 0)
254 progress = ui.makeprogress(
253 progress = ui.makeprogress(
255 _(b'checking'), unit=_(b'changesets'), total=len(repo)
254 _(b'checking'), unit=_(b'changesets'), total=len(repo)
256 )
255 )
257 for i in repo:
256 for i in repo:
258 progress.update(i)
257 progress.update(i)
259 n = cl.node(i)
258 n = cl.node(i)
260 self._checkentry(cl, i, n, seen, [i], b"changelog")
259 self._checkentry(cl, i, n, seen, [i], b"changelog")
261
260
262 try:
261 try:
263 changes = cl.read(n)
262 changes = cl.read(n)
264 if changes[0] != self.repo.nullid:
263 if changes[0] != self.repo.nullid:
265 mflinkrevs.setdefault(changes[0], []).append(i)
264 mflinkrevs.setdefault(changes[0], []).append(i)
266 self.refersmf = True
265 self.refersmf = True
267 for f in changes[3]:
266 for f in changes[3]:
268 if match(f):
267 if match(f):
269 filelinkrevs.setdefault(_normpath(f), []).append(i)
268 filelinkrevs.setdefault(_normpath(f), []).append(i)
270 except Exception as inst:
269 except Exception as inst:
271 self.refersmf = True
270 self.refersmf = True
272 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
271 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
273 progress.complete()
272 progress.complete()
274 return mflinkrevs, filelinkrevs
273 return mflinkrevs, filelinkrevs
275
274
276 def _verifymanifest(
275 def _verifymanifest(
277 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
276 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
278 ):
277 ):
279 """verify the manifestlog content
278 """verify the manifestlog content
280
279
281 Inputs:
280 Inputs:
282 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
281 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
283 - dir: a subdirectory to check (for tree manifest repo)
282 - dir: a subdirectory to check (for tree manifest repo)
284 - storefiles: set of currently "orphan" files.
283 - storefiles: set of currently "orphan" files.
285 - subdirprogress: a progress object
284 - subdirprogress: a progress object
286
285
287 This function checks:
286 This function checks:
288 * all of `_checkrevlog` checks (for all manifest related revlogs)
287 * all of `_checkrevlog` checks (for all manifest related revlogs)
289 * all of `_checkentry` checks (for all manifest related revisions)
288 * all of `_checkentry` checks (for all manifest related revisions)
290 * nodes for subdirectory exists in the sub-directory manifest
289 * nodes for subdirectory exists in the sub-directory manifest
291 * each manifest entries have a file path
290 * each manifest entries have a file path
292 * each manifest node refered in mflinkrevs exist in the manifest log
291 * each manifest node refered in mflinkrevs exist in the manifest log
293
292
294 If tree manifest is in use and a matchers is specified, only the
293 If tree manifest is in use and a matchers is specified, only the
295 sub-directories matching it will be verified.
294 sub-directories matching it will be verified.
296
295
297 return a two level mapping:
296 return a two level mapping:
298 {"path" -> { filenode -> changelog-revision}}
297 {"path" -> { filenode -> changelog-revision}}
299
298
300 This mapping primarily contains entries for every files in the
299 This mapping primarily contains entries for every files in the
301 repository. In addition, when tree-manifest is used, it also contains
300 repository. In addition, when tree-manifest is used, it also contains
302 sub-directory entries.
301 sub-directory entries.
303
302
304 If a matcher is provided, only matching paths will be included.
303 If a matcher is provided, only matching paths will be included.
305 """
304 """
306 repo = self.repo
305 repo = self.repo
307 ui = self.ui
306 ui = self.ui
308 match = self.match
307 match = self.match
309 mfl = self.repo.manifestlog
308 mfl = self.repo.manifestlog
310 mf = mfl.getstorage(dir)
309 mf = mfl.getstorage(dir)
311
310
312 if not dir:
311 if not dir:
313 self.ui.status(_(b"checking manifests\n"))
312 self.ui.status(_(b"checking manifests\n"))
314
313
315 filenodes = {}
314 filenodes = {}
316 subdirnodes = {}
315 subdirnodes = {}
317 seen = {}
316 seen = {}
318 label = b"manifest"
317 label = b"manifest"
319 if dir:
318 if dir:
320 label = dir
319 label = dir
321 revlogfiles = mf.files()
320 revlogfiles = mf.files()
322 storefiles.difference_update(revlogfiles)
321 storefiles.difference_update(revlogfiles)
323 if subdirprogress: # should be true since we're in a subdirectory
322 if subdirprogress: # should be true since we're in a subdirectory
324 subdirprogress.increment()
323 subdirprogress.increment()
325 if self.refersmf:
324 if self.refersmf:
326 # Do not check manifest if there are only changelog entries with
325 # Do not check manifest if there are only changelog entries with
327 # null manifests.
326 # null manifests.
328 self._checkrevlog(mf._revlog, label, 0)
327 self._checkrevlog(mf._revlog, label, 0)
329 progress = ui.makeprogress(
328 progress = ui.makeprogress(
330 _(b'checking'), unit=_(b'manifests'), total=len(mf)
329 _(b'checking'), unit=_(b'manifests'), total=len(mf)
331 )
330 )
332 for i in mf:
331 for i in mf:
333 if not dir:
332 if not dir:
334 progress.update(i)
333 progress.update(i)
335 n = mf.node(i)
334 n = mf.node(i)
336 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
335 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
337 if n in mflinkrevs:
336 if n in mflinkrevs:
338 del mflinkrevs[n]
337 del mflinkrevs[n]
339 elif dir:
338 elif dir:
340 self._err(
339 self._err(
341 lr,
340 lr,
342 _(b"%s not in parent-directory manifest") % short(n),
341 _(b"%s not in parent-directory manifest") % short(n),
343 label,
342 label,
344 )
343 )
345 else:
344 else:
346 self._err(lr, _(b"%s not in changesets") % short(n), label)
345 self._err(lr, _(b"%s not in changesets") % short(n), label)
347
346
348 try:
347 try:
349 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
348 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
350 for f, fn, fl in mfdelta.iterentries():
349 for f, fn, fl in mfdelta.iterentries():
351 if not f:
350 if not f:
352 self._err(lr, _(b"entry without name in manifest"))
351 self._err(lr, _(b"entry without name in manifest"))
353 elif f == b"/dev/null": # ignore this in very old repos
352 elif f == b"/dev/null": # ignore this in very old repos
354 continue
353 continue
355 fullpath = dir + _normpath(f)
354 fullpath = dir + _normpath(f)
356 if fl == b't':
355 if fl == b't':
357 if not match.visitdir(fullpath):
356 if not match.visitdir(fullpath):
358 continue
357 continue
359 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
358 subdirnodes.setdefault(fullpath + b'/', {}).setdefault(
360 fn, []
359 fn, []
361 ).append(lr)
360 ).append(lr)
362 else:
361 else:
363 if not match(fullpath):
362 if not match(fullpath):
364 continue
363 continue
365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
364 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
366 except Exception as inst:
365 except Exception as inst:
367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
366 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
368 if self._level >= VERIFY_FULL:
367 if self._level >= VERIFY_FULL:
369 try:
368 try:
370 # Various issues can affect manifest. So we read each full
369 # Various issues can affect manifest. So we read each full
371 # text from storage. This triggers the checks from the core
370 # text from storage. This triggers the checks from the core
372 # code (eg: hash verification, filename are ordered, etc.)
371 # code (eg: hash verification, filename are ordered, etc.)
373 mfdelta = mfl.get(dir, n).read()
372 mfdelta = mfl.get(dir, n).read()
374 except Exception as inst:
373 except Exception as inst:
375 self._exc(
374 self._exc(
376 lr,
375 lr,
377 _(b"reading full manifest %s") % short(n),
376 _(b"reading full manifest %s") % short(n),
378 inst,
377 inst,
379 label,
378 label,
380 )
379 )
381
380
382 if not dir:
381 if not dir:
383 progress.complete()
382 progress.complete()
384
383
385 if self.havemf:
384 if self.havemf:
386 # since we delete entry in `mflinkrevs` during iteration, any
385 # since we delete entry in `mflinkrevs` during iteration, any
387 # remaining entries are "missing". We need to issue errors for them.
386 # remaining entries are "missing". We need to issue errors for them.
388 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
387 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
389 for c, m in sorted(changesetpairs):
388 for c, m in sorted(changesetpairs):
390 if dir:
389 if dir:
391 self._err(
390 self._err(
392 c,
391 c,
393 _(
392 _(
394 b"parent-directory manifest refers to unknown"
393 b"parent-directory manifest refers to unknown"
395 b" revision %s"
394 b" revision %s"
396 )
395 )
397 % short(m),
396 % short(m),
398 label,
397 label,
399 )
398 )
400 else:
399 else:
401 self._err(
400 self._err(
402 c,
401 c,
403 _(b"changeset refers to unknown revision %s")
402 _(b"changeset refers to unknown revision %s")
404 % short(m),
403 % short(m),
405 label,
404 label,
406 )
405 )
407
406
408 if not dir and subdirnodes:
407 if not dir and subdirnodes:
409 self.ui.status(_(b"checking directory manifests\n"))
408 self.ui.status(_(b"checking directory manifests\n"))
410 storefiles = set()
409 storefiles = set()
411 subdirs = set()
410 subdirs = set()
412 revlogv1 = self.revlogv1
411 revlogv1 = self.revlogv1
413 for t, f, f2, size in repo.store.datafiles():
412 for t, f, f2, size in repo.store.datafiles():
414 if not f:
413 if not f:
415 self._err(None, _(b"cannot decode filename '%s'") % f2)
414 self._err(None, _(b"cannot decode filename '%s'") % f2)
416 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
415 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
417 storefiles.add(_normpath(f))
416 storefiles.add(_normpath(f))
418 subdirs.add(os.path.dirname(f))
417 subdirs.add(os.path.dirname(f))
419 subdirprogress = ui.makeprogress(
418 subdirprogress = ui.makeprogress(
420 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
419 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
421 )
420 )
422
421
423 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
422 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
424 subdirfilenodes = self._verifymanifest(
423 subdirfilenodes = self._verifymanifest(
425 linkrevs, subdir, storefiles, subdirprogress
424 linkrevs, subdir, storefiles, subdirprogress
426 )
425 )
427 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
426 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
428 filenodes.setdefault(f, {}).update(onefilenodes)
427 filenodes.setdefault(f, {}).update(onefilenodes)
429
428
430 if not dir and subdirnodes:
429 if not dir and subdirnodes:
431 assert subdirprogress is not None # help pytype
430 assert subdirprogress is not None # help pytype
432 subdirprogress.complete()
431 subdirprogress.complete()
433 if self.warnorphanstorefiles:
432 if self.warnorphanstorefiles:
434 for f in sorted(storefiles):
433 for f in sorted(storefiles):
435 self._warn(_(b"warning: orphan data file '%s'") % f)
434 self._warn(_(b"warning: orphan data file '%s'") % f)
436
435
437 return filenodes
436 return filenodes
438
437
439 def _crosscheckfiles(self, filelinkrevs, filenodes):
438 def _crosscheckfiles(self, filelinkrevs, filenodes):
440 repo = self.repo
439 repo = self.repo
441 ui = self.ui
440 ui = self.ui
442 ui.status(_(b"crosschecking files in changesets and manifests\n"))
441 ui.status(_(b"crosschecking files in changesets and manifests\n"))
443
442
444 total = len(filelinkrevs) + len(filenodes)
443 total = len(filelinkrevs) + len(filenodes)
445 progress = ui.makeprogress(
444 progress = ui.makeprogress(
446 _(b'crosschecking'), unit=_(b'files'), total=total
445 _(b'crosschecking'), unit=_(b'files'), total=total
447 )
446 )
448 if self.havemf:
447 if self.havemf:
449 for f in sorted(filelinkrevs):
448 for f in sorted(filelinkrevs):
450 progress.increment()
449 progress.increment()
451 if f not in filenodes:
450 if f not in filenodes:
452 lr = filelinkrevs[f][0]
451 lr = filelinkrevs[f][0]
453 self._err(lr, _(b"in changeset but not in manifest"), f)
452 self._err(lr, _(b"in changeset but not in manifest"), f)
454
453
455 if self.havecl:
454 if self.havecl:
456 for f in sorted(filenodes):
455 for f in sorted(filenodes):
457 progress.increment()
456 progress.increment()
458 if f not in filelinkrevs:
457 if f not in filelinkrevs:
459 try:
458 try:
460 fl = repo.file(f)
459 fl = repo.file(f)
461 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
460 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
462 except Exception:
461 except Exception:
463 lr = None
462 lr = None
464 self._err(lr, _(b"in manifest but not in changeset"), f)
463 self._err(lr, _(b"in manifest but not in changeset"), f)
465
464
466 progress.complete()
465 progress.complete()
467
466
468 def _verifyfiles(self, filenodes, filelinkrevs):
467 def _verifyfiles(self, filenodes, filelinkrevs):
469 repo = self.repo
468 repo = self.repo
470 ui = self.ui
469 ui = self.ui
471 lrugetctx = self.lrugetctx
470 lrugetctx = self.lrugetctx
472 revlogv1 = self.revlogv1
471 revlogv1 = self.revlogv1
473 havemf = self.havemf
472 havemf = self.havemf
474 ui.status(_(b"checking files\n"))
473 ui.status(_(b"checking files\n"))
475
474
476 storefiles = set()
475 storefiles = set()
477 for rl_type, f, f2, size in repo.store.datafiles():
476 for rl_type, f, f2, size in repo.store.datafiles():
478 if not f:
477 if not f:
479 self._err(None, _(b"cannot decode filename '%s'") % f2)
478 self._err(None, _(b"cannot decode filename '%s'") % f2)
480 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
479 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
481 storefiles.add(_normpath(f))
480 storefiles.add(_normpath(f))
482
481
483 state = {
482 state = {
484 # TODO this assumes revlog storage for changelog.
483 # TODO this assumes revlog storage for changelog.
485 b'expectedversion': self.repo.changelog._format_version,
484 b'expectedversion': self.repo.changelog._format_version,
486 b'skipflags': self.skipflags,
485 b'skipflags': self.skipflags,
487 # experimental config: censor.policy
486 # experimental config: censor.policy
488 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
487 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
489 }
488 }
490
489
491 files = sorted(set(filenodes) | set(filelinkrevs))
490 files = sorted(set(filenodes) | set(filelinkrevs))
492 revisions = 0
491 revisions = 0
493 progress = ui.makeprogress(
492 progress = ui.makeprogress(
494 _(b'checking'), unit=_(b'files'), total=len(files)
493 _(b'checking'), unit=_(b'files'), total=len(files)
495 )
494 )
496 for i, f in enumerate(files):
495 for i, f in enumerate(files):
497 progress.update(i, item=f)
496 progress.update(i, item=f)
498 try:
497 try:
499 linkrevs = filelinkrevs[f]
498 linkrevs = filelinkrevs[f]
500 except KeyError:
499 except KeyError:
501 # in manifest but not in changelog
500 # in manifest but not in changelog
502 linkrevs = []
501 linkrevs = []
503
502
504 if linkrevs:
503 if linkrevs:
505 lr = linkrevs[0]
504 lr = linkrevs[0]
506 else:
505 else:
507 lr = None
506 lr = None
508
507
509 try:
508 try:
510 fl = repo.file(f)
509 fl = repo.file(f)
511 except error.StorageError as e:
510 except error.StorageError as e:
512 self._err(lr, _(b"broken revlog! (%s)") % e, f)
511 self._err(lr, _(b"broken revlog! (%s)") % e, f)
513 continue
512 continue
514
513
515 for ff in fl.files():
514 for ff in fl.files():
516 try:
515 try:
517 storefiles.remove(ff)
516 storefiles.remove(ff)
518 except KeyError:
517 except KeyError:
519 if self.warnorphanstorefiles:
518 if self.warnorphanstorefiles:
520 self._warn(
519 self._warn(
521 _(b" warning: revlog '%s' not in fncache!") % ff
520 _(b" warning: revlog '%s' not in fncache!") % ff
522 )
521 )
523 self.fncachewarned = True
522 self.fncachewarned = True
524
523
525 if not len(fl) and (self.havecl or self.havemf):
524 if not len(fl) and (self.havecl or self.havemf):
526 self._err(lr, _(b"empty or missing %s") % f)
525 self._err(lr, _(b"empty or missing %s") % f)
527 else:
526 else:
528 # Guard against implementations not setting this.
527 # Guard against implementations not setting this.
529 state[b'skipread'] = set()
528 state[b'skipread'] = set()
530 state[b'safe_renamed'] = set()
529 state[b'safe_renamed'] = set()
531
530
532 for problem in fl.verifyintegrity(state):
531 for problem in fl.verifyintegrity(state):
533 if problem.node is not None:
532 if problem.node is not None:
534 linkrev = fl.linkrev(fl.rev(problem.node))
533 linkrev = fl.linkrev(fl.rev(problem.node))
535 else:
534 else:
536 linkrev = None
535 linkrev = None
537
536
538 if problem.warning:
537 if problem.warning:
539 self._warn(problem.warning)
538 self._warn(problem.warning)
540 elif problem.error:
539 elif problem.error:
541 self._err(
540 self._err(
542 linkrev if linkrev is not None else lr,
541 linkrev if linkrev is not None else lr,
543 problem.error,
542 problem.error,
544 f,
543 f,
545 )
544 )
546 else:
545 else:
547 raise error.ProgrammingError(
546 raise error.ProgrammingError(
548 b'problem instance does not set warning or error '
547 b'problem instance does not set warning or error '
549 b'attribute: %s' % problem.msg
548 b'attribute: %s' % problem.msg
550 )
549 )
551
550
552 seen = {}
551 seen = {}
553 for i in fl:
552 for i in fl:
554 revisions += 1
553 revisions += 1
555 n = fl.node(i)
554 n = fl.node(i)
556 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
555 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
557 if f in filenodes:
556 if f in filenodes:
558 if havemf and n not in filenodes[f]:
557 if havemf and n not in filenodes[f]:
559 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
558 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
560 else:
559 else:
561 del filenodes[f][n]
560 del filenodes[f][n]
562
561
563 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
562 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
564 continue
563 continue
565
564
566 # check renames
565 # check renames
567 try:
566 try:
568 # This requires resolving fulltext (at least on revlogs,
567 # This requires resolving fulltext (at least on revlogs,
569 # though not with LFS revisions). We may want
568 # though not with LFS revisions). We may want
570 # ``verifyintegrity()`` to pass a set of nodes with
569 # ``verifyintegrity()`` to pass a set of nodes with
571 # rename metadata as an optimization.
570 # rename metadata as an optimization.
572 rp = fl.renamed(n)
571 rp = fl.renamed(n)
573 if rp:
572 if rp:
574 if lr is not None and ui.verbose:
573 if lr is not None and ui.verbose:
575 ctx = lrugetctx(lr)
574 ctx = lrugetctx(lr)
576 if not any(rp[0] in pctx for pctx in ctx.parents()):
575 if not any(rp[0] in pctx for pctx in ctx.parents()):
577 self._warn(
576 self._warn(
578 _(
577 _(
579 b"warning: copy source of '%s' not"
578 b"warning: copy source of '%s' not"
580 b" in parents of %s"
579 b" in parents of %s"
581 )
580 )
582 % (f, ctx)
581 % (f, ctx)
583 )
582 )
584 fl2 = repo.file(rp[0])
583 fl2 = repo.file(rp[0])
585 if not len(fl2):
584 if not len(fl2):
586 self._err(
585 self._err(
587 lr,
586 lr,
588 _(
587 _(
589 b"empty or missing copy source revlog "
588 b"empty or missing copy source revlog "
590 b"%s:%s"
589 b"%s:%s"
591 )
590 )
592 % (rp[0], short(rp[1])),
591 % (rp[0], short(rp[1])),
593 f,
592 f,
594 )
593 )
595 elif rp[1] == self.repo.nullid:
594 elif rp[1] == self.repo.nullid:
596 ui.note(
595 ui.note(
597 _(
596 _(
598 b"warning: %s@%s: copy source"
597 b"warning: %s@%s: copy source"
599 b" revision is nullid %s:%s\n"
598 b" revision is nullid %s:%s\n"
600 )
599 )
601 % (f, lr, rp[0], short(rp[1]))
600 % (f, lr, rp[0], short(rp[1]))
602 )
601 )
603 else:
602 else:
604 fl2.rev(rp[1])
603 fl2.rev(rp[1])
605 except Exception as inst:
604 except Exception as inst:
606 self._exc(
605 self._exc(
607 lr, _(b"checking rename of %s") % short(n), inst, f
606 lr, _(b"checking rename of %s") % short(n), inst, f
608 )
607 )
609
608
610 # cross-check
609 # cross-check
611 if f in filenodes:
610 if f in filenodes:
612 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
611 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
613 for lr, node in sorted(fns):
612 for lr, node in sorted(fns):
614 self._err(
613 self._err(
615 lr,
614 lr,
616 _(b"manifest refers to unknown revision %s")
615 _(b"manifest refers to unknown revision %s")
617 % short(node),
616 % short(node),
618 f,
617 f,
619 )
618 )
620 progress.complete()
619 progress.complete()
621
620
622 if self.warnorphanstorefiles:
621 if self.warnorphanstorefiles:
623 for f in sorted(storefiles):
622 for f in sorted(storefiles):
624 self._warn(_(b"warning: orphan data file '%s'") % f)
623 self._warn(_(b"warning: orphan data file '%s'") % f)
625
624
626 return len(files), revisions
625 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now