##// END OF EJS Templates
verify: use some intermediate variables instead of a multi-liner...
marmoute -
r48161:ab5fd39c default
parent child Browse files
Show More
@@ -1,590 +1,586 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import short
13 from .node import short
14 from .utils import stringutil
14 from .utils import stringutil
15
15
16 from . import (
16 from . import (
17 error,
17 error,
18 pycompat,
18 pycompat,
19 revlog,
19 revlog,
20 util,
20 util,
21 )
21 )
22
22
23 VERIFY_DEFAULT = 0
23 VERIFY_DEFAULT = 0
24 VERIFY_FULL = 1
24 VERIFY_FULL = 1
25
25
26
26
27 def verify(repo, level=None):
27 def verify(repo, level=None):
28 with repo.lock():
28 with repo.lock():
29 v = verifier(repo, level)
29 v = verifier(repo, level)
30 return v.verify()
30 return v.verify()
31
31
32
32
33 def _normpath(f):
33 def _normpath(f):
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # under hg < 2.4, convert didn't sanitize paths properly, so a
35 # converted repo may contain repeated slashes
35 # converted repo may contain repeated slashes
36 while b'//' in f:
36 while b'//' in f:
37 f = f.replace(b'//', b'/')
37 f = f.replace(b'//', b'/')
38 return f
38 return f
39
39
40
40
41 HINT_FNCACHE = _(
41 HINT_FNCACHE = _(
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
43 )
43 )
44
44
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 WARN_PARENT_DIR_UNKNOWN_REV = _(
46 b"parent-directory manifest refers to unknown revision %s"
46 b"parent-directory manifest refers to unknown revision %s"
47 )
47 )
48
48
49 WARN_UNKNOWN_COPY_SOURCE = _(
49 WARN_UNKNOWN_COPY_SOURCE = _(
50 b"warning: copy source of '%s' not in parents of %s"
50 b"warning: copy source of '%s' not in parents of %s"
51 )
51 )
52
52
53 WARN_NULLID_COPY_SOURCE = _(
53 WARN_NULLID_COPY_SOURCE = _(
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
54 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
55 )
55 )
56
56
57
57
58 class verifier(object):
58 class verifier(object):
59 def __init__(self, repo, level=None):
59 def __init__(self, repo, level=None):
60 self.repo = repo.unfiltered()
60 self.repo = repo.unfiltered()
61 self.ui = repo.ui
61 self.ui = repo.ui
62 self.match = repo.narrowmatch()
62 self.match = repo.narrowmatch()
63 if level is None:
63 if level is None:
64 level = VERIFY_DEFAULT
64 level = VERIFY_DEFAULT
65 self._level = level
65 self._level = level
66 self.badrevs = set()
66 self.badrevs = set()
67 self.errors = 0
67 self.errors = 0
68 self.warnings = 0
68 self.warnings = 0
69 self.havecl = len(repo.changelog) > 0
69 self.havecl = len(repo.changelog) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
70 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
71 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
72 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
73 self.refersmf = False
73 self.refersmf = False
74 self.fncachewarned = False
74 self.fncachewarned = False
75 # developer config: verify.skipflags
75 # developer config: verify.skipflags
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
76 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
77 self.warnorphanstorefiles = True
77 self.warnorphanstorefiles = True
78
78
79 def _warn(self, msg):
79 def _warn(self, msg):
80 """record a "warning" level issue"""
80 """record a "warning" level issue"""
81 self.ui.warn(msg + b"\n")
81 self.ui.warn(msg + b"\n")
82 self.warnings += 1
82 self.warnings += 1
83
83
84 def _err(self, linkrev, msg, filename=None):
84 def _err(self, linkrev, msg, filename=None):
85 """record a "error" level issue"""
85 """record a "error" level issue"""
86 if linkrev is not None:
86 if linkrev is not None:
87 self.badrevs.add(linkrev)
87 self.badrevs.add(linkrev)
88 linkrev = b"%d" % linkrev
88 linkrev = b"%d" % linkrev
89 else:
89 else:
90 linkrev = b'?'
90 linkrev = b'?'
91 msg = b"%s: %s" % (linkrev, msg)
91 msg = b"%s: %s" % (linkrev, msg)
92 if filename:
92 if filename:
93 msg = b"%s@%s" % (filename, msg)
93 msg = b"%s@%s" % (filename, msg)
94 self.ui.warn(b" " + msg + b"\n")
94 self.ui.warn(b" " + msg + b"\n")
95 self.errors += 1
95 self.errors += 1
96
96
97 def _exc(self, linkrev, msg, inst, filename=None):
97 def _exc(self, linkrev, msg, inst, filename=None):
98 """record exception raised during the verify process"""
98 """record exception raised during the verify process"""
99 fmsg = stringutil.forcebytestr(inst)
99 fmsg = stringutil.forcebytestr(inst)
100 if not fmsg:
100 if not fmsg:
101 fmsg = pycompat.byterepr(inst)
101 fmsg = pycompat.byterepr(inst)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
102 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
103
103
104 def _checkrevlog(self, obj, name, linkrev):
104 def _checkrevlog(self, obj, name, linkrev):
105 """verify high level property of a revlog
105 """verify high level property of a revlog
106
106
107 - revlog is present,
107 - revlog is present,
108 - revlog is non-empty,
108 - revlog is non-empty,
109 - sizes (index and data) are correct,
109 - sizes (index and data) are correct,
110 - revlog's format version is correct.
110 - revlog's format version is correct.
111 """
111 """
112 if not len(obj) and (self.havecl or self.havemf):
112 if not len(obj) and (self.havecl or self.havemf):
113 self._err(linkrev, _(b"empty or missing %s") % name)
113 self._err(linkrev, _(b"empty or missing %s") % name)
114 return
114 return
115
115
116 d = obj.checksize()
116 d = obj.checksize()
117 if d[0]:
117 if d[0]:
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
118 self._err(None, _(b"data length off by %d bytes") % d[0], name)
119 if d[1]:
119 if d[1]:
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
120 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
121
121
122 if obj._format_version != revlog.REVLOGV0:
122 if obj._format_version != revlog.REVLOGV0:
123 if not self.revlogv1:
123 if not self.revlogv1:
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
124 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
125 elif self.revlogv1:
125 elif self.revlogv1:
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
126 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
127
127
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
128 def _checkentry(self, obj, i, node, seen, linkrevs, f):
129 """verify a single revlog entry
129 """verify a single revlog entry
130
130
131 arguments are:
131 arguments are:
132 - obj: the source revlog
132 - obj: the source revlog
133 - i: the revision number
133 - i: the revision number
134 - node: the revision node id
134 - node: the revision node id
135 - seen: nodes previously seen for this revlog
135 - seen: nodes previously seen for this revlog
136 - linkrevs: [changelog-revisions] introducing "node"
136 - linkrevs: [changelog-revisions] introducing "node"
137 - f: string label ("changelog", "manifest", or filename)
137 - f: string label ("changelog", "manifest", or filename)
138
138
139 Performs the following checks:
139 Performs the following checks:
140 - linkrev points to an existing changelog revision,
140 - linkrev points to an existing changelog revision,
141 - linkrev points to a changelog revision that introduces this revision,
141 - linkrev points to a changelog revision that introduces this revision,
142 - linkrev points to the lowest of these changesets,
142 - linkrev points to the lowest of these changesets,
143 - both parents exist in the revlog,
143 - both parents exist in the revlog,
144 - the revision is not duplicated.
144 - the revision is not duplicated.
145
145
146 Return the linkrev of the revision (or None for changelog's revisions).
146 Return the linkrev of the revision (or None for changelog's revisions).
147 """
147 """
148 lr = obj.linkrev(obj.rev(node))
148 lr = obj.linkrev(obj.rev(node))
149 if lr < 0 or (self.havecl and lr not in linkrevs):
149 if lr < 0 or (self.havecl and lr not in linkrevs):
150 if lr < 0 or lr >= len(self.repo.changelog):
150 if lr < 0 or lr >= len(self.repo.changelog):
151 msg = _(b"rev %d points to nonexistent changeset %d")
151 msg = _(b"rev %d points to nonexistent changeset %d")
152 else:
152 else:
153 msg = _(b"rev %d points to unexpected changeset %d")
153 msg = _(b"rev %d points to unexpected changeset %d")
154 self._err(None, msg % (i, lr), f)
154 self._err(None, msg % (i, lr), f)
155 if linkrevs:
155 if linkrevs:
156 if f and len(linkrevs) > 1:
156 if f and len(linkrevs) > 1:
157 try:
157 try:
158 # attempt to filter down to real linkrevs
158 # attempt to filter down to real linkrevs
159 linkrevs = []
159 linkrevs = []
160 for lr in linkrevs:
160 for lr in linkrevs:
161 if self.lrugetctx(lr)[f].filenode() == node:
161 if self.lrugetctx(lr)[f].filenode() == node:
162 linkrevs.append(lr)
162 linkrevs.append(lr)
163 except Exception:
163 except Exception:
164 pass
164 pass
165 msg = _(b" (expected %s)")
165 msg = _(b" (expected %s)")
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
166 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
167 self._warn(msg)
167 self._warn(msg)
168 lr = None # can't be trusted
168 lr = None # can't be trusted
169
169
170 try:
170 try:
171 p1, p2 = obj.parents(node)
171 p1, p2 = obj.parents(node)
172 if p1 not in seen and p1 != self.repo.nullid:
172 if p1 not in seen and p1 != self.repo.nullid:
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
173 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
174 self._err(lr, msg, f)
174 self._err(lr, msg, f)
175 if p2 not in seen and p2 != self.repo.nullid:
175 if p2 not in seen and p2 != self.repo.nullid:
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
176 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
177 self._err(lr, msg, f)
177 self._err(lr, msg, f)
178 except Exception as inst:
178 except Exception as inst:
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
179 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
180
180
181 if node in seen:
181 if node in seen:
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
182 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
183 seen[node] = i
183 seen[node] = i
184 return lr
184 return lr
185
185
186 def verify(self):
186 def verify(self):
187 """verify the content of the Mercurial repository
187 """verify the content of the Mercurial repository
188
188
189 This method run all verifications, displaying issues as they are found.
189 This method run all verifications, displaying issues as they are found.
190
190
191 return 1 if any error have been encountered, 0 otherwise."""
191 return 1 if any error have been encountered, 0 otherwise."""
192 # initial validation and generic report
192 # initial validation and generic report
193 repo = self.repo
193 repo = self.repo
194 ui = repo.ui
194 ui = repo.ui
195 if not repo.url().startswith(b'file:'):
195 if not repo.url().startswith(b'file:'):
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
196 raise error.Abort(_(b"cannot verify bundle or remote repos"))
197
197
198 if os.path.exists(repo.sjoin(b"journal")):
198 if os.path.exists(repo.sjoin(b"journal")):
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
199 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
200
200
201 if ui.verbose or not self.revlogv1:
201 if ui.verbose or not self.revlogv1:
202 ui.status(
202 ui.status(
203 _(b"repository uses revlog format %d\n")
203 _(b"repository uses revlog format %d\n")
204 % (self.revlogv1 and 1 or 0)
204 % (self.revlogv1 and 1 or 0)
205 )
205 )
206
206
207 # data verification
207 # data verification
208 mflinkrevs, filelinkrevs = self._verifychangelog()
208 mflinkrevs, filelinkrevs = self._verifychangelog()
209 filenodes = self._verifymanifest(mflinkrevs)
209 filenodes = self._verifymanifest(mflinkrevs)
210 del mflinkrevs
210 del mflinkrevs
211 self._crosscheckfiles(filelinkrevs, filenodes)
211 self._crosscheckfiles(filelinkrevs, filenodes)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
212 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
213
213
214 # final report
214 # final report
215 ui.status(
215 ui.status(
216 _(b"checked %d changesets with %d changes to %d files\n")
216 _(b"checked %d changesets with %d changes to %d files\n")
217 % (len(repo.changelog), filerevisions, totalfiles)
217 % (len(repo.changelog), filerevisions, totalfiles)
218 )
218 )
219 if self.warnings:
219 if self.warnings:
220 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
220 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
221 if self.fncachewarned:
221 if self.fncachewarned:
222 ui.warn(HINT_FNCACHE)
222 ui.warn(HINT_FNCACHE)
223 if self.errors:
223 if self.errors:
224 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
224 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
225 if self.badrevs:
225 if self.badrevs:
226 msg = _(b"(first damaged changeset appears to be %d)\n")
226 msg = _(b"(first damaged changeset appears to be %d)\n")
227 msg %= min(self.badrevs)
227 msg %= min(self.badrevs)
228 ui.warn(msg)
228 ui.warn(msg)
229 return 1
229 return 1
230 return 0
230 return 0
231
231
232 def _verifychangelog(self):
232 def _verifychangelog(self):
233 """verify the changelog of a repository
233 """verify the changelog of a repository
234
234
235 The following checks are performed:
235 The following checks are performed:
236 - all of `_checkrevlog` checks,
236 - all of `_checkrevlog` checks,
237 - all of `_checkentry` checks (for each revisions),
237 - all of `_checkentry` checks (for each revisions),
238 - each revision can be read.
238 - each revision can be read.
239
239
240 The function returns some of the data observed in the changesets as a
240 The function returns some of the data observed in the changesets as a
241 (mflinkrevs, filelinkrevs) tuples:
241 (mflinkrevs, filelinkrevs) tuples:
242 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
242 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
243 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
243 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
244
244
245 If a matcher was specified, filelinkrevs will only contains matched
245 If a matcher was specified, filelinkrevs will only contains matched
246 files.
246 files.
247 """
247 """
248 ui = self.ui
248 ui = self.ui
249 repo = self.repo
249 repo = self.repo
250 match = self.match
250 match = self.match
251 cl = repo.changelog
251 cl = repo.changelog
252
252
253 ui.status(_(b"checking changesets\n"))
253 ui.status(_(b"checking changesets\n"))
254 mflinkrevs = {}
254 mflinkrevs = {}
255 filelinkrevs = {}
255 filelinkrevs = {}
256 seen = {}
256 seen = {}
257 self._checkrevlog(cl, b"changelog", 0)
257 self._checkrevlog(cl, b"changelog", 0)
258 progress = ui.makeprogress(
258 progress = ui.makeprogress(
259 _(b'checking'), unit=_(b'changesets'), total=len(repo)
259 _(b'checking'), unit=_(b'changesets'), total=len(repo)
260 )
260 )
261 for i in repo:
261 for i in repo:
262 progress.update(i)
262 progress.update(i)
263 n = cl.node(i)
263 n = cl.node(i)
264 self._checkentry(cl, i, n, seen, [i], b"changelog")
264 self._checkentry(cl, i, n, seen, [i], b"changelog")
265
265
266 try:
266 try:
267 changes = cl.read(n)
267 changes = cl.read(n)
268 if changes[0] != self.repo.nullid:
268 if changes[0] != self.repo.nullid:
269 mflinkrevs.setdefault(changes[0], []).append(i)
269 mflinkrevs.setdefault(changes[0], []).append(i)
270 self.refersmf = True
270 self.refersmf = True
271 for f in changes[3]:
271 for f in changes[3]:
272 if match(f):
272 if match(f):
273 filelinkrevs.setdefault(_normpath(f), []).append(i)
273 filelinkrevs.setdefault(_normpath(f), []).append(i)
274 except Exception as inst:
274 except Exception as inst:
275 self.refersmf = True
275 self.refersmf = True
276 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
276 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
277 progress.complete()
277 progress.complete()
278 return mflinkrevs, filelinkrevs
278 return mflinkrevs, filelinkrevs
279
279
280 def _verifymanifest(
280 def _verifymanifest(
281 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
281 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
282 ):
282 ):
283 """verify the manifestlog content
283 """verify the manifestlog content
284
284
285 Inputs:
285 Inputs:
286 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
286 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
287 - dir: a subdirectory to check (for tree manifest repo)
287 - dir: a subdirectory to check (for tree manifest repo)
288 - storefiles: set of currently "orphan" files.
288 - storefiles: set of currently "orphan" files.
289 - subdirprogress: a progress object
289 - subdirprogress: a progress object
290
290
291 This function checks:
291 This function checks:
292 * all of `_checkrevlog` checks (for all manifest related revlogs)
292 * all of `_checkrevlog` checks (for all manifest related revlogs)
293 * all of `_checkentry` checks (for all manifest related revisions)
293 * all of `_checkentry` checks (for all manifest related revisions)
294 * nodes for subdirectory exists in the sub-directory manifest
294 * nodes for subdirectory exists in the sub-directory manifest
295 * each manifest entries have a file path
295 * each manifest entries have a file path
296 * each manifest node refered in mflinkrevs exist in the manifest log
296 * each manifest node refered in mflinkrevs exist in the manifest log
297
297
298 If tree manifest is in use and a matchers is specified, only the
298 If tree manifest is in use and a matchers is specified, only the
299 sub-directories matching it will be verified.
299 sub-directories matching it will be verified.
300
300
301 return a two level mapping:
301 return a two level mapping:
302 {"path" -> { filenode -> changelog-revision}}
302 {"path" -> { filenode -> changelog-revision}}
303
303
304 This mapping primarily contains entries for every files in the
304 This mapping primarily contains entries for every files in the
305 repository. In addition, when tree-manifest is used, it also contains
305 repository. In addition, when tree-manifest is used, it also contains
306 sub-directory entries.
306 sub-directory entries.
307
307
308 If a matcher is provided, only matching paths will be included.
308 If a matcher is provided, only matching paths will be included.
309 """
309 """
310 repo = self.repo
310 repo = self.repo
311 ui = self.ui
311 ui = self.ui
312 match = self.match
312 match = self.match
313 mfl = self.repo.manifestlog
313 mfl = self.repo.manifestlog
314 mf = mfl.getstorage(dir)
314 mf = mfl.getstorage(dir)
315
315
316 if not dir:
316 if not dir:
317 self.ui.status(_(b"checking manifests\n"))
317 self.ui.status(_(b"checking manifests\n"))
318
318
319 filenodes = {}
319 filenodes = {}
320 subdirnodes = {}
320 subdirnodes = {}
321 seen = {}
321 seen = {}
322 label = b"manifest"
322 label = b"manifest"
323 if dir:
323 if dir:
324 label = dir
324 label = dir
325 revlogfiles = mf.files()
325 revlogfiles = mf.files()
326 storefiles.difference_update(revlogfiles)
326 storefiles.difference_update(revlogfiles)
327 if subdirprogress: # should be true since we're in a subdirectory
327 if subdirprogress: # should be true since we're in a subdirectory
328 subdirprogress.increment()
328 subdirprogress.increment()
329 if self.refersmf:
329 if self.refersmf:
330 # Do not check manifest if there are only changelog entries with
330 # Do not check manifest if there are only changelog entries with
331 # null manifests.
331 # null manifests.
332 self._checkrevlog(mf._revlog, label, 0)
332 self._checkrevlog(mf._revlog, label, 0)
333 progress = ui.makeprogress(
333 progress = ui.makeprogress(
334 _(b'checking'), unit=_(b'manifests'), total=len(mf)
334 _(b'checking'), unit=_(b'manifests'), total=len(mf)
335 )
335 )
336 for i in mf:
336 for i in mf:
337 if not dir:
337 if not dir:
338 progress.update(i)
338 progress.update(i)
339 n = mf.node(i)
339 n = mf.node(i)
340 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
340 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
341 if n in mflinkrevs:
341 if n in mflinkrevs:
342 del mflinkrevs[n]
342 del mflinkrevs[n]
343 elif dir:
343 elif dir:
344 msg = _(b"%s not in parent-directory manifest") % short(n)
344 msg = _(b"%s not in parent-directory manifest") % short(n)
345 self._err(lr, msg, label)
345 self._err(lr, msg, label)
346 else:
346 else:
347 self._err(lr, _(b"%s not in changesets") % short(n), label)
347 self._err(lr, _(b"%s not in changesets") % short(n), label)
348
348
349 try:
349 try:
350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
350 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
351 for f, fn, fl in mfdelta.iterentries():
351 for f, fn, fl in mfdelta.iterentries():
352 if not f:
352 if not f:
353 self._err(lr, _(b"entry without name in manifest"))
353 self._err(lr, _(b"entry without name in manifest"))
354 elif f == b"/dev/null": # ignore this in very old repos
354 elif f == b"/dev/null": # ignore this in very old repos
355 continue
355 continue
356 fullpath = dir + _normpath(f)
356 fullpath = dir + _normpath(f)
357 if fl == b't':
357 if fl == b't':
358 if not match.visitdir(fullpath):
358 if not match.visitdir(fullpath):
359 continue
359 continue
360 sdn = subdirnodes.setdefault(fullpath + b'/', {})
360 sdn = subdirnodes.setdefault(fullpath + b'/', {})
361 sdn.setdefault(fn, []).append(lr)
361 sdn.setdefault(fn, []).append(lr)
362 else:
362 else:
363 if not match(fullpath):
363 if not match(fullpath):
364 continue
364 continue
365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
365 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
366 except Exception as inst:
366 except Exception as inst:
367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
367 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
368 if self._level >= VERIFY_FULL:
368 if self._level >= VERIFY_FULL:
369 try:
369 try:
370 # Various issues can affect manifest. So we read each full
370 # Various issues can affect manifest. So we read each full
371 # text from storage. This triggers the checks from the core
371 # text from storage. This triggers the checks from the core
372 # code (eg: hash verification, filename are ordered, etc.)
372 # code (eg: hash verification, filename are ordered, etc.)
373 mfdelta = mfl.get(dir, n).read()
373 mfdelta = mfl.get(dir, n).read()
374 except Exception as inst:
374 except Exception as inst:
375 msg = _(b"reading full manifest %s") % short(n)
375 msg = _(b"reading full manifest %s") % short(n)
376 self._exc(lr, msg, inst, label)
376 self._exc(lr, msg, inst, label)
377
377
378 if not dir:
378 if not dir:
379 progress.complete()
379 progress.complete()
380
380
381 if self.havemf:
381 if self.havemf:
382 # since we delete entry in `mflinkrevs` during iteration, any
382 # since we delete entry in `mflinkrevs` during iteration, any
383 # remaining entries are "missing". We need to issue errors for them.
383 # remaining entries are "missing". We need to issue errors for them.
384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
384 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
385 for c, m in sorted(changesetpairs):
385 for c, m in sorted(changesetpairs):
386 if dir:
386 if dir:
387 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
387 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
388 else:
388 else:
389 msg = _(b"changeset refers to unknown revision %s")
389 msg = _(b"changeset refers to unknown revision %s")
390 msg %= short(m)
390 msg %= short(m)
391 self._err(c, msg, label)
391 self._err(c, msg, label)
392
392
393 if not dir and subdirnodes:
393 if not dir and subdirnodes:
394 self.ui.status(_(b"checking directory manifests\n"))
394 self.ui.status(_(b"checking directory manifests\n"))
395 storefiles = set()
395 storefiles = set()
396 subdirs = set()
396 subdirs = set()
397 revlogv1 = self.revlogv1
397 revlogv1 = self.revlogv1
398 for t, f, f2, size in repo.store.datafiles():
398 for t, f, f2, size in repo.store.datafiles():
399 if not f:
399 if not f:
400 self._err(None, _(b"cannot decode filename '%s'") % f2)
400 self._err(None, _(b"cannot decode filename '%s'") % f2)
401 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
401 elif (size > 0 or not revlogv1) and f.startswith(b'meta/'):
402 storefiles.add(_normpath(f))
402 storefiles.add(_normpath(f))
403 subdirs.add(os.path.dirname(f))
403 subdirs.add(os.path.dirname(f))
404 subdirprogress = ui.makeprogress(
404 subdirprogress = ui.makeprogress(
405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
406 )
406 )
407
407
408 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
408 for subdir, linkrevs in pycompat.iteritems(subdirnodes):
409 subdirfilenodes = self._verifymanifest(
409 subdirfilenodes = self._verifymanifest(
410 linkrevs, subdir, storefiles, subdirprogress
410 linkrevs, subdir, storefiles, subdirprogress
411 )
411 )
412 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
412 for f, onefilenodes in pycompat.iteritems(subdirfilenodes):
413 filenodes.setdefault(f, {}).update(onefilenodes)
413 filenodes.setdefault(f, {}).update(onefilenodes)
414
414
415 if not dir and subdirnodes:
415 if not dir and subdirnodes:
416 assert subdirprogress is not None # help pytype
416 assert subdirprogress is not None # help pytype
417 subdirprogress.complete()
417 subdirprogress.complete()
418 if self.warnorphanstorefiles:
418 if self.warnorphanstorefiles:
419 for f in sorted(storefiles):
419 for f in sorted(storefiles):
420 self._warn(_(b"warning: orphan data file '%s'") % f)
420 self._warn(_(b"warning: orphan data file '%s'") % f)
421
421
422 return filenodes
422 return filenodes
423
423
424 def _crosscheckfiles(self, filelinkrevs, filenodes):
424 def _crosscheckfiles(self, filelinkrevs, filenodes):
425 repo = self.repo
425 repo = self.repo
426 ui = self.ui
426 ui = self.ui
427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
428
428
429 total = len(filelinkrevs) + len(filenodes)
429 total = len(filelinkrevs) + len(filenodes)
430 progress = ui.makeprogress(
430 progress = ui.makeprogress(
431 _(b'crosschecking'), unit=_(b'files'), total=total
431 _(b'crosschecking'), unit=_(b'files'), total=total
432 )
432 )
433 if self.havemf:
433 if self.havemf:
434 for f in sorted(filelinkrevs):
434 for f in sorted(filelinkrevs):
435 progress.increment()
435 progress.increment()
436 if f not in filenodes:
436 if f not in filenodes:
437 lr = filelinkrevs[f][0]
437 lr = filelinkrevs[f][0]
438 self._err(lr, _(b"in changeset but not in manifest"), f)
438 self._err(lr, _(b"in changeset but not in manifest"), f)
439
439
440 if self.havecl:
440 if self.havecl:
441 for f in sorted(filenodes):
441 for f in sorted(filenodes):
442 progress.increment()
442 progress.increment()
443 if f not in filelinkrevs:
443 if f not in filelinkrevs:
444 try:
444 try:
445 fl = repo.file(f)
445 fl = repo.file(f)
446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
447 except Exception:
447 except Exception:
448 lr = None
448 lr = None
449 self._err(lr, _(b"in manifest but not in changeset"), f)
449 self._err(lr, _(b"in manifest but not in changeset"), f)
450
450
451 progress.complete()
451 progress.complete()
452
452
453 def _verifyfiles(self, filenodes, filelinkrevs):
453 def _verifyfiles(self, filenodes, filelinkrevs):
454 repo = self.repo
454 repo = self.repo
455 ui = self.ui
455 ui = self.ui
456 lrugetctx = self.lrugetctx
456 lrugetctx = self.lrugetctx
457 revlogv1 = self.revlogv1
457 revlogv1 = self.revlogv1
458 havemf = self.havemf
458 havemf = self.havemf
459 ui.status(_(b"checking files\n"))
459 ui.status(_(b"checking files\n"))
460
460
461 storefiles = set()
461 storefiles = set()
462 for rl_type, f, f2, size in repo.store.datafiles():
462 for rl_type, f, f2, size in repo.store.datafiles():
463 if not f:
463 if not f:
464 self._err(None, _(b"cannot decode filename '%s'") % f2)
464 self._err(None, _(b"cannot decode filename '%s'") % f2)
465 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
465 elif (size > 0 or not revlogv1) and f.startswith(b'data/'):
466 storefiles.add(_normpath(f))
466 storefiles.add(_normpath(f))
467
467
468 state = {
468 state = {
469 # TODO this assumes revlog storage for changelog.
469 # TODO this assumes revlog storage for changelog.
470 b'expectedversion': self.repo.changelog._format_version,
470 b'expectedversion': self.repo.changelog._format_version,
471 b'skipflags': self.skipflags,
471 b'skipflags': self.skipflags,
472 # experimental config: censor.policy
472 # experimental config: censor.policy
473 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
473 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
474 }
474 }
475
475
476 files = sorted(set(filenodes) | set(filelinkrevs))
476 files = sorted(set(filenodes) | set(filelinkrevs))
477 revisions = 0
477 revisions = 0
478 progress = ui.makeprogress(
478 progress = ui.makeprogress(
479 _(b'checking'), unit=_(b'files'), total=len(files)
479 _(b'checking'), unit=_(b'files'), total=len(files)
480 )
480 )
481 for i, f in enumerate(files):
481 for i, f in enumerate(files):
482 progress.update(i, item=f)
482 progress.update(i, item=f)
483 try:
483 try:
484 linkrevs = filelinkrevs[f]
484 linkrevs = filelinkrevs[f]
485 except KeyError:
485 except KeyError:
486 # in manifest but not in changelog
486 # in manifest but not in changelog
487 linkrevs = []
487 linkrevs = []
488
488
489 if linkrevs:
489 if linkrevs:
490 lr = linkrevs[0]
490 lr = linkrevs[0]
491 else:
491 else:
492 lr = None
492 lr = None
493
493
494 try:
494 try:
495 fl = repo.file(f)
495 fl = repo.file(f)
496 except error.StorageError as e:
496 except error.StorageError as e:
497 self._err(lr, _(b"broken revlog! (%s)") % e, f)
497 self._err(lr, _(b"broken revlog! (%s)") % e, f)
498 continue
498 continue
499
499
500 for ff in fl.files():
500 for ff in fl.files():
501 try:
501 try:
502 storefiles.remove(ff)
502 storefiles.remove(ff)
503 except KeyError:
503 except KeyError:
504 if self.warnorphanstorefiles:
504 if self.warnorphanstorefiles:
505 msg = _(b" warning: revlog '%s' not in fncache!")
505 msg = _(b" warning: revlog '%s' not in fncache!")
506 self._warn(msg % ff)
506 self._warn(msg % ff)
507 self.fncachewarned = True
507 self.fncachewarned = True
508
508
509 if not len(fl) and (self.havecl or self.havemf):
509 if not len(fl) and (self.havecl or self.havemf):
510 self._err(lr, _(b"empty or missing %s") % f)
510 self._err(lr, _(b"empty or missing %s") % f)
511 else:
511 else:
512 # Guard against implementations not setting this.
512 # Guard against implementations not setting this.
513 state[b'skipread'] = set()
513 state[b'skipread'] = set()
514 state[b'safe_renamed'] = set()
514 state[b'safe_renamed'] = set()
515
515
516 for problem in fl.verifyintegrity(state):
516 for problem in fl.verifyintegrity(state):
517 if problem.node is not None:
517 if problem.node is not None:
518 linkrev = fl.linkrev(fl.rev(problem.node))
518 linkrev = fl.linkrev(fl.rev(problem.node))
519 else:
519 else:
520 linkrev = None
520 linkrev = None
521
521
522 if problem.warning:
522 if problem.warning:
523 self._warn(problem.warning)
523 self._warn(problem.warning)
524 elif problem.error:
524 elif problem.error:
525 linkrev_msg = linkrev if linkrev is not None else lr
525 linkrev_msg = linkrev if linkrev is not None else lr
526 self._err(linkrev_msg, problem.error, f)
526 self._err(linkrev_msg, problem.error, f)
527 else:
527 else:
528 raise error.ProgrammingError(
528 raise error.ProgrammingError(
529 b'problem instance does not set warning or error '
529 b'problem instance does not set warning or error '
530 b'attribute: %s' % problem.msg
530 b'attribute: %s' % problem.msg
531 )
531 )
532
532
533 seen = {}
533 seen = {}
534 for i in fl:
534 for i in fl:
535 revisions += 1
535 revisions += 1
536 n = fl.node(i)
536 n = fl.node(i)
537 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
537 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
538 if f in filenodes:
538 if f in filenodes:
539 if havemf and n not in filenodes[f]:
539 if havemf and n not in filenodes[f]:
540 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
540 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
541 else:
541 else:
542 del filenodes[f][n]
542 del filenodes[f][n]
543
543
544 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
544 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
545 continue
545 continue
546
546
547 # check renames
547 # check renames
548 try:
548 try:
549 # This requires resolving fulltext (at least on revlogs,
549 # This requires resolving fulltext (at least on revlogs,
550 # though not with LFS revisions). We may want
550 # though not with LFS revisions). We may want
551 # ``verifyintegrity()`` to pass a set of nodes with
551 # ``verifyintegrity()`` to pass a set of nodes with
552 # rename metadata as an optimization.
552 # rename metadata as an optimization.
553 rp = fl.renamed(n)
553 rp = fl.renamed(n)
554 if rp:
554 if rp:
555 if lr is not None and ui.verbose:
555 if lr is not None and ui.verbose:
556 ctx = lrugetctx(lr)
556 ctx = lrugetctx(lr)
557 if not any(rp[0] in pctx for pctx in ctx.parents()):
557 if not any(rp[0] in pctx for pctx in ctx.parents()):
558 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
558 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
559 fl2 = repo.file(rp[0])
559 fl2 = repo.file(rp[0])
560 if not len(fl2):
560 if not len(fl2):
561 m = _(b"empty or missing copy source revlog %s:%s")
561 m = _(b"empty or missing copy source revlog %s:%s")
562 self._err(lr, m % (rp[0], short(rp[1])), f)
562 self._err(lr, m % (rp[0], short(rp[1])), f)
563 elif rp[1] == self.repo.nullid:
563 elif rp[1] == self.repo.nullid:
564 msg = WARN_NULLID_COPY_SOURCE
564 msg = WARN_NULLID_COPY_SOURCE
565 msg %= (f, lr, rp[0], short(rp[1]))
565 msg %= (f, lr, rp[0], short(rp[1]))
566 ui.note(msg)
566 ui.note(msg)
567 else:
567 else:
568 fl2.rev(rp[1])
568 fl2.rev(rp[1])
569 except Exception as inst:
569 except Exception as inst:
570 self._exc(
570 self._exc(
571 lr, _(b"checking rename of %s") % short(n), inst, f
571 lr, _(b"checking rename of %s") % short(n), inst, f
572 )
572 )
573
573
574 # cross-check
574 # cross-check
575 if f in filenodes:
575 if f in filenodes:
576 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
576 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
577 for lr, node in sorted(fns):
577 for lr, node in sorted(fns):
578 self._err(
578 msg = _(b"manifest refers to unknown revision %s")
579 lr,
579 self._err(lr, msg % short(node), f)
580 _(b"manifest refers to unknown revision %s")
581 % short(node),
582 f,
583 )
584 progress.complete()
580 progress.complete()
585
581
586 if self.warnorphanstorefiles:
582 if self.warnorphanstorefiles:
587 for f in sorted(storefiles):
583 for f in sorted(storefiles):
588 self._warn(_(b"warning: orphan data file '%s'") % f)
584 self._warn(_(b"warning: orphan data file '%s'") % f)
589
585
590 return len(files), revisions
586 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now