##// END OF EJS Templates
verify: remove pycompat.iteritems()...
Gregory Szorc -
r49786:6f10a2d6 default
parent child Browse files
Show More
@@ -1,587 +1,587 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
3 # Copyright 2006, 2007 Olivia Mackall <olivia@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8
8
9 import os
9 import os
10
10
11 from .i18n import _
11 from .i18n import _
12 from .node import short
12 from .node import short
13 from .utils import stringutil
13 from .utils import stringutil
14
14
15 from . import (
15 from . import (
16 error,
16 error,
17 pycompat,
17 pycompat,
18 revlog,
18 revlog,
19 util,
19 util,
20 )
20 )
21
21
22 VERIFY_DEFAULT = 0
22 VERIFY_DEFAULT = 0
23 VERIFY_FULL = 1
23 VERIFY_FULL = 1
24
24
25
25
26 def verify(repo, level=None):
26 def verify(repo, level=None):
27 with repo.lock():
27 with repo.lock():
28 v = verifier(repo, level)
28 v = verifier(repo, level)
29 return v.verify()
29 return v.verify()
30
30
31
31
32 def _normpath(f):
32 def _normpath(f):
33 # under hg < 2.4, convert didn't sanitize paths properly, so a
33 # under hg < 2.4, convert didn't sanitize paths properly, so a
34 # converted repo may contain repeated slashes
34 # converted repo may contain repeated slashes
35 while b'//' in f:
35 while b'//' in f:
36 f = f.replace(b'//', b'/')
36 f = f.replace(b'//', b'/')
37 return f
37 return f
38
38
39
39
40 HINT_FNCACHE = _(
40 HINT_FNCACHE = _(
41 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
41 b'hint: run "hg debugrebuildfncache" to recover from corrupt fncache\n'
42 )
42 )
43
43
44 WARN_PARENT_DIR_UNKNOWN_REV = _(
44 WARN_PARENT_DIR_UNKNOWN_REV = _(
45 b"parent-directory manifest refers to unknown revision %s"
45 b"parent-directory manifest refers to unknown revision %s"
46 )
46 )
47
47
48 WARN_UNKNOWN_COPY_SOURCE = _(
48 WARN_UNKNOWN_COPY_SOURCE = _(
49 b"warning: copy source of '%s' not in parents of %s"
49 b"warning: copy source of '%s' not in parents of %s"
50 )
50 )
51
51
52 WARN_NULLID_COPY_SOURCE = _(
52 WARN_NULLID_COPY_SOURCE = _(
53 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
53 b"warning: %s@%s: copy source revision is nullid %s:%s\n"
54 )
54 )
55
55
56
56
57 class verifier(object):
57 class verifier(object):
58 def __init__(self, repo, level=None):
58 def __init__(self, repo, level=None):
59 self.repo = repo.unfiltered()
59 self.repo = repo.unfiltered()
60 self.ui = repo.ui
60 self.ui = repo.ui
61 self.match = repo.narrowmatch()
61 self.match = repo.narrowmatch()
62 if level is None:
62 if level is None:
63 level = VERIFY_DEFAULT
63 level = VERIFY_DEFAULT
64 self._level = level
64 self._level = level
65 self.badrevs = set()
65 self.badrevs = set()
66 self.errors = 0
66 self.errors = 0
67 self.warnings = 0
67 self.warnings = 0
68 self.havecl = len(repo.changelog) > 0
68 self.havecl = len(repo.changelog) > 0
69 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
69 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
70 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
70 self.revlogv1 = repo.changelog._format_version != revlog.REVLOGV0
71 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
71 self.lrugetctx = util.lrucachefunc(repo.unfiltered().__getitem__)
72 self.refersmf = False
72 self.refersmf = False
73 self.fncachewarned = False
73 self.fncachewarned = False
74 # developer config: verify.skipflags
74 # developer config: verify.skipflags
75 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
75 self.skipflags = repo.ui.configint(b'verify', b'skipflags')
76 self.warnorphanstorefiles = True
76 self.warnorphanstorefiles = True
77
77
78 def _warn(self, msg):
78 def _warn(self, msg):
79 """record a "warning" level issue"""
79 """record a "warning" level issue"""
80 self.ui.warn(msg + b"\n")
80 self.ui.warn(msg + b"\n")
81 self.warnings += 1
81 self.warnings += 1
82
82
83 def _err(self, linkrev, msg, filename=None):
83 def _err(self, linkrev, msg, filename=None):
84 """record a "error" level issue"""
84 """record a "error" level issue"""
85 if linkrev is not None:
85 if linkrev is not None:
86 self.badrevs.add(linkrev)
86 self.badrevs.add(linkrev)
87 linkrev = b"%d" % linkrev
87 linkrev = b"%d" % linkrev
88 else:
88 else:
89 linkrev = b'?'
89 linkrev = b'?'
90 msg = b"%s: %s" % (linkrev, msg)
90 msg = b"%s: %s" % (linkrev, msg)
91 if filename:
91 if filename:
92 msg = b"%s@%s" % (filename, msg)
92 msg = b"%s@%s" % (filename, msg)
93 self.ui.warn(b" " + msg + b"\n")
93 self.ui.warn(b" " + msg + b"\n")
94 self.errors += 1
94 self.errors += 1
95
95
96 def _exc(self, linkrev, msg, inst, filename=None):
96 def _exc(self, linkrev, msg, inst, filename=None):
97 """record exception raised during the verify process"""
97 """record exception raised during the verify process"""
98 fmsg = stringutil.forcebytestr(inst)
98 fmsg = stringutil.forcebytestr(inst)
99 if not fmsg:
99 if not fmsg:
100 fmsg = pycompat.byterepr(inst)
100 fmsg = pycompat.byterepr(inst)
101 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
101 self._err(linkrev, b"%s: %s" % (msg, fmsg), filename)
102
102
103 def _checkrevlog(self, obj, name, linkrev):
103 def _checkrevlog(self, obj, name, linkrev):
104 """verify high level property of a revlog
104 """verify high level property of a revlog
105
105
106 - revlog is present,
106 - revlog is present,
107 - revlog is non-empty,
107 - revlog is non-empty,
108 - sizes (index and data) are correct,
108 - sizes (index and data) are correct,
109 - revlog's format version is correct.
109 - revlog's format version is correct.
110 """
110 """
111 if not len(obj) and (self.havecl or self.havemf):
111 if not len(obj) and (self.havecl or self.havemf):
112 self._err(linkrev, _(b"empty or missing %s") % name)
112 self._err(linkrev, _(b"empty or missing %s") % name)
113 return
113 return
114
114
115 d = obj.checksize()
115 d = obj.checksize()
116 if d[0]:
116 if d[0]:
117 self._err(None, _(b"data length off by %d bytes") % d[0], name)
117 self._err(None, _(b"data length off by %d bytes") % d[0], name)
118 if d[1]:
118 if d[1]:
119 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
119 self._err(None, _(b"index contains %d extra bytes") % d[1], name)
120
120
121 if obj._format_version != revlog.REVLOGV0:
121 if obj._format_version != revlog.REVLOGV0:
122 if not self.revlogv1:
122 if not self.revlogv1:
123 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
123 self._warn(_(b"warning: `%s' uses revlog format 1") % name)
124 elif self.revlogv1:
124 elif self.revlogv1:
125 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
125 self._warn(_(b"warning: `%s' uses revlog format 0") % name)
126
126
127 def _checkentry(self, obj, i, node, seen, linkrevs, f):
127 def _checkentry(self, obj, i, node, seen, linkrevs, f):
128 """verify a single revlog entry
128 """verify a single revlog entry
129
129
130 arguments are:
130 arguments are:
131 - obj: the source revlog
131 - obj: the source revlog
132 - i: the revision number
132 - i: the revision number
133 - node: the revision node id
133 - node: the revision node id
134 - seen: nodes previously seen for this revlog
134 - seen: nodes previously seen for this revlog
135 - linkrevs: [changelog-revisions] introducing "node"
135 - linkrevs: [changelog-revisions] introducing "node"
136 - f: string label ("changelog", "manifest", or filename)
136 - f: string label ("changelog", "manifest", or filename)
137
137
138 Performs the following checks:
138 Performs the following checks:
139 - linkrev points to an existing changelog revision,
139 - linkrev points to an existing changelog revision,
140 - linkrev points to a changelog revision that introduces this revision,
140 - linkrev points to a changelog revision that introduces this revision,
141 - linkrev points to the lowest of these changesets,
141 - linkrev points to the lowest of these changesets,
142 - both parents exist in the revlog,
142 - both parents exist in the revlog,
143 - the revision is not duplicated.
143 - the revision is not duplicated.
144
144
145 Return the linkrev of the revision (or None for changelog's revisions).
145 Return the linkrev of the revision (or None for changelog's revisions).
146 """
146 """
147 lr = obj.linkrev(obj.rev(node))
147 lr = obj.linkrev(obj.rev(node))
148 if lr < 0 or (self.havecl and lr not in linkrevs):
148 if lr < 0 or (self.havecl and lr not in linkrevs):
149 if lr < 0 or lr >= len(self.repo.changelog):
149 if lr < 0 or lr >= len(self.repo.changelog):
150 msg = _(b"rev %d points to nonexistent changeset %d")
150 msg = _(b"rev %d points to nonexistent changeset %d")
151 else:
151 else:
152 msg = _(b"rev %d points to unexpected changeset %d")
152 msg = _(b"rev %d points to unexpected changeset %d")
153 self._err(None, msg % (i, lr), f)
153 self._err(None, msg % (i, lr), f)
154 if linkrevs:
154 if linkrevs:
155 if f and len(linkrevs) > 1:
155 if f and len(linkrevs) > 1:
156 try:
156 try:
157 # attempt to filter down to real linkrevs
157 # attempt to filter down to real linkrevs
158 linkrevs = []
158 linkrevs = []
159 for lr in linkrevs:
159 for lr in linkrevs:
160 if self.lrugetctx(lr)[f].filenode() == node:
160 if self.lrugetctx(lr)[f].filenode() == node:
161 linkrevs.append(lr)
161 linkrevs.append(lr)
162 except Exception:
162 except Exception:
163 pass
163 pass
164 msg = _(b" (expected %s)")
164 msg = _(b" (expected %s)")
165 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
165 msg %= b" ".join(map(pycompat.bytestr, linkrevs))
166 self._warn(msg)
166 self._warn(msg)
167 lr = None # can't be trusted
167 lr = None # can't be trusted
168
168
169 try:
169 try:
170 p1, p2 = obj.parents(node)
170 p1, p2 = obj.parents(node)
171 if p1 not in seen and p1 != self.repo.nullid:
171 if p1 not in seen and p1 != self.repo.nullid:
172 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
172 msg = _(b"unknown parent 1 %s of %s") % (short(p1), short(node))
173 self._err(lr, msg, f)
173 self._err(lr, msg, f)
174 if p2 not in seen and p2 != self.repo.nullid:
174 if p2 not in seen and p2 != self.repo.nullid:
175 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
175 msg = _(b"unknown parent 2 %s of %s") % (short(p2), short(node))
176 self._err(lr, msg, f)
176 self._err(lr, msg, f)
177 except Exception as inst:
177 except Exception as inst:
178 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
178 self._exc(lr, _(b"checking parents of %s") % short(node), inst, f)
179
179
180 if node in seen:
180 if node in seen:
181 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
181 self._err(lr, _(b"duplicate revision %d (%d)") % (i, seen[node]), f)
182 seen[node] = i
182 seen[node] = i
183 return lr
183 return lr
184
184
185 def verify(self):
185 def verify(self):
186 """verify the content of the Mercurial repository
186 """verify the content of the Mercurial repository
187
187
188 This method run all verifications, displaying issues as they are found.
188 This method run all verifications, displaying issues as they are found.
189
189
190 return 1 if any error have been encountered, 0 otherwise."""
190 return 1 if any error have been encountered, 0 otherwise."""
191 # initial validation and generic report
191 # initial validation and generic report
192 repo = self.repo
192 repo = self.repo
193 ui = repo.ui
193 ui = repo.ui
194 if not repo.url().startswith(b'file:'):
194 if not repo.url().startswith(b'file:'):
195 raise error.Abort(_(b"cannot verify bundle or remote repos"))
195 raise error.Abort(_(b"cannot verify bundle or remote repos"))
196
196
197 if os.path.exists(repo.sjoin(b"journal")):
197 if os.path.exists(repo.sjoin(b"journal")):
198 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
198 ui.warn(_(b"abandoned transaction found - run hg recover\n"))
199
199
200 if ui.verbose or not self.revlogv1:
200 if ui.verbose or not self.revlogv1:
201 ui.status(
201 ui.status(
202 _(b"repository uses revlog format %d\n")
202 _(b"repository uses revlog format %d\n")
203 % (self.revlogv1 and 1 or 0)
203 % (self.revlogv1 and 1 or 0)
204 )
204 )
205
205
206 # data verification
206 # data verification
207 mflinkrevs, filelinkrevs = self._verifychangelog()
207 mflinkrevs, filelinkrevs = self._verifychangelog()
208 filenodes = self._verifymanifest(mflinkrevs)
208 filenodes = self._verifymanifest(mflinkrevs)
209 del mflinkrevs
209 del mflinkrevs
210 self._crosscheckfiles(filelinkrevs, filenodes)
210 self._crosscheckfiles(filelinkrevs, filenodes)
211 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
211 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
212
212
213 # final report
213 # final report
214 ui.status(
214 ui.status(
215 _(b"checked %d changesets with %d changes to %d files\n")
215 _(b"checked %d changesets with %d changes to %d files\n")
216 % (len(repo.changelog), filerevisions, totalfiles)
216 % (len(repo.changelog), filerevisions, totalfiles)
217 )
217 )
218 if self.warnings:
218 if self.warnings:
219 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
219 ui.warn(_(b"%d warnings encountered!\n") % self.warnings)
220 if self.fncachewarned:
220 if self.fncachewarned:
221 ui.warn(HINT_FNCACHE)
221 ui.warn(HINT_FNCACHE)
222 if self.errors:
222 if self.errors:
223 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
223 ui.warn(_(b"%d integrity errors encountered!\n") % self.errors)
224 if self.badrevs:
224 if self.badrevs:
225 msg = _(b"(first damaged changeset appears to be %d)\n")
225 msg = _(b"(first damaged changeset appears to be %d)\n")
226 msg %= min(self.badrevs)
226 msg %= min(self.badrevs)
227 ui.warn(msg)
227 ui.warn(msg)
228 return 1
228 return 1
229 return 0
229 return 0
230
230
231 def _verifychangelog(self):
231 def _verifychangelog(self):
232 """verify the changelog of a repository
232 """verify the changelog of a repository
233
233
234 The following checks are performed:
234 The following checks are performed:
235 - all of `_checkrevlog` checks,
235 - all of `_checkrevlog` checks,
236 - all of `_checkentry` checks (for each revisions),
236 - all of `_checkentry` checks (for each revisions),
237 - each revision can be read.
237 - each revision can be read.
238
238
239 The function returns some of the data observed in the changesets as a
239 The function returns some of the data observed in the changesets as a
240 (mflinkrevs, filelinkrevs) tuples:
240 (mflinkrevs, filelinkrevs) tuples:
241 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
241 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
242 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
242 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
243
243
244 If a matcher was specified, filelinkrevs will only contains matched
244 If a matcher was specified, filelinkrevs will only contains matched
245 files.
245 files.
246 """
246 """
247 ui = self.ui
247 ui = self.ui
248 repo = self.repo
248 repo = self.repo
249 match = self.match
249 match = self.match
250 cl = repo.changelog
250 cl = repo.changelog
251
251
252 ui.status(_(b"checking changesets\n"))
252 ui.status(_(b"checking changesets\n"))
253 mflinkrevs = {}
253 mflinkrevs = {}
254 filelinkrevs = {}
254 filelinkrevs = {}
255 seen = {}
255 seen = {}
256 self._checkrevlog(cl, b"changelog", 0)
256 self._checkrevlog(cl, b"changelog", 0)
257 progress = ui.makeprogress(
257 progress = ui.makeprogress(
258 _(b'checking'), unit=_(b'changesets'), total=len(repo)
258 _(b'checking'), unit=_(b'changesets'), total=len(repo)
259 )
259 )
260 for i in repo:
260 for i in repo:
261 progress.update(i)
261 progress.update(i)
262 n = cl.node(i)
262 n = cl.node(i)
263 self._checkentry(cl, i, n, seen, [i], b"changelog")
263 self._checkentry(cl, i, n, seen, [i], b"changelog")
264
264
265 try:
265 try:
266 changes = cl.read(n)
266 changes = cl.read(n)
267 if changes[0] != self.repo.nullid:
267 if changes[0] != self.repo.nullid:
268 mflinkrevs.setdefault(changes[0], []).append(i)
268 mflinkrevs.setdefault(changes[0], []).append(i)
269 self.refersmf = True
269 self.refersmf = True
270 for f in changes[3]:
270 for f in changes[3]:
271 if match(f):
271 if match(f):
272 filelinkrevs.setdefault(_normpath(f), []).append(i)
272 filelinkrevs.setdefault(_normpath(f), []).append(i)
273 except Exception as inst:
273 except Exception as inst:
274 self.refersmf = True
274 self.refersmf = True
275 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
275 self._exc(i, _(b"unpacking changeset %s") % short(n), inst)
276 progress.complete()
276 progress.complete()
277 return mflinkrevs, filelinkrevs
277 return mflinkrevs, filelinkrevs
278
278
279 def _verifymanifest(
279 def _verifymanifest(
280 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
280 self, mflinkrevs, dir=b"", storefiles=None, subdirprogress=None
281 ):
281 ):
282 """verify the manifestlog content
282 """verify the manifestlog content
283
283
284 Inputs:
284 Inputs:
285 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
285 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
286 - dir: a subdirectory to check (for tree manifest repo)
286 - dir: a subdirectory to check (for tree manifest repo)
287 - storefiles: set of currently "orphan" files.
287 - storefiles: set of currently "orphan" files.
288 - subdirprogress: a progress object
288 - subdirprogress: a progress object
289
289
290 This function checks:
290 This function checks:
291 * all of `_checkrevlog` checks (for all manifest related revlogs)
291 * all of `_checkrevlog` checks (for all manifest related revlogs)
292 * all of `_checkentry` checks (for all manifest related revisions)
292 * all of `_checkentry` checks (for all manifest related revisions)
293 * nodes for subdirectory exists in the sub-directory manifest
293 * nodes for subdirectory exists in the sub-directory manifest
294 * each manifest entries have a file path
294 * each manifest entries have a file path
295 * each manifest node refered in mflinkrevs exist in the manifest log
295 * each manifest node refered in mflinkrevs exist in the manifest log
296
296
297 If tree manifest is in use and a matchers is specified, only the
297 If tree manifest is in use and a matchers is specified, only the
298 sub-directories matching it will be verified.
298 sub-directories matching it will be verified.
299
299
300 return a two level mapping:
300 return a two level mapping:
301 {"path" -> { filenode -> changelog-revision}}
301 {"path" -> { filenode -> changelog-revision}}
302
302
303 This mapping primarily contains entries for every files in the
303 This mapping primarily contains entries for every files in the
304 repository. In addition, when tree-manifest is used, it also contains
304 repository. In addition, when tree-manifest is used, it also contains
305 sub-directory entries.
305 sub-directory entries.
306
306
307 If a matcher is provided, only matching paths will be included.
307 If a matcher is provided, only matching paths will be included.
308 """
308 """
309 repo = self.repo
309 repo = self.repo
310 ui = self.ui
310 ui = self.ui
311 match = self.match
311 match = self.match
312 mfl = self.repo.manifestlog
312 mfl = self.repo.manifestlog
313 mf = mfl.getstorage(dir)
313 mf = mfl.getstorage(dir)
314
314
315 if not dir:
315 if not dir:
316 self.ui.status(_(b"checking manifests\n"))
316 self.ui.status(_(b"checking manifests\n"))
317
317
318 filenodes = {}
318 filenodes = {}
319 subdirnodes = {}
319 subdirnodes = {}
320 seen = {}
320 seen = {}
321 label = b"manifest"
321 label = b"manifest"
322 if dir:
322 if dir:
323 label = dir
323 label = dir
324 revlogfiles = mf.files()
324 revlogfiles = mf.files()
325 storefiles.difference_update(revlogfiles)
325 storefiles.difference_update(revlogfiles)
326 if subdirprogress: # should be true since we're in a subdirectory
326 if subdirprogress: # should be true since we're in a subdirectory
327 subdirprogress.increment()
327 subdirprogress.increment()
328 if self.refersmf:
328 if self.refersmf:
329 # Do not check manifest if there are only changelog entries with
329 # Do not check manifest if there are only changelog entries with
330 # null manifests.
330 # null manifests.
331 self._checkrevlog(mf._revlog, label, 0)
331 self._checkrevlog(mf._revlog, label, 0)
332 progress = ui.makeprogress(
332 progress = ui.makeprogress(
333 _(b'checking'), unit=_(b'manifests'), total=len(mf)
333 _(b'checking'), unit=_(b'manifests'), total=len(mf)
334 )
334 )
335 for i in mf:
335 for i in mf:
336 if not dir:
336 if not dir:
337 progress.update(i)
337 progress.update(i)
338 n = mf.node(i)
338 n = mf.node(i)
339 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
339 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
340 if n in mflinkrevs:
340 if n in mflinkrevs:
341 del mflinkrevs[n]
341 del mflinkrevs[n]
342 elif dir:
342 elif dir:
343 msg = _(b"%s not in parent-directory manifest") % short(n)
343 msg = _(b"%s not in parent-directory manifest") % short(n)
344 self._err(lr, msg, label)
344 self._err(lr, msg, label)
345 else:
345 else:
346 self._err(lr, _(b"%s not in changesets") % short(n), label)
346 self._err(lr, _(b"%s not in changesets") % short(n), label)
347
347
348 try:
348 try:
349 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
349 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
350 for f, fn, fl in mfdelta.iterentries():
350 for f, fn, fl in mfdelta.iterentries():
351 if not f:
351 if not f:
352 self._err(lr, _(b"entry without name in manifest"))
352 self._err(lr, _(b"entry without name in manifest"))
353 elif f == b"/dev/null": # ignore this in very old repos
353 elif f == b"/dev/null": # ignore this in very old repos
354 continue
354 continue
355 fullpath = dir + _normpath(f)
355 fullpath = dir + _normpath(f)
356 if fl == b't':
356 if fl == b't':
357 if not match.visitdir(fullpath):
357 if not match.visitdir(fullpath):
358 continue
358 continue
359 sdn = subdirnodes.setdefault(fullpath + b'/', {})
359 sdn = subdirnodes.setdefault(fullpath + b'/', {})
360 sdn.setdefault(fn, []).append(lr)
360 sdn.setdefault(fn, []).append(lr)
361 else:
361 else:
362 if not match(fullpath):
362 if not match(fullpath):
363 continue
363 continue
364 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
364 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
365 except Exception as inst:
365 except Exception as inst:
366 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
366 self._exc(lr, _(b"reading delta %s") % short(n), inst, label)
367 if self._level >= VERIFY_FULL:
367 if self._level >= VERIFY_FULL:
368 try:
368 try:
369 # Various issues can affect manifest. So we read each full
369 # Various issues can affect manifest. So we read each full
370 # text from storage. This triggers the checks from the core
370 # text from storage. This triggers the checks from the core
371 # code (eg: hash verification, filename are ordered, etc.)
371 # code (eg: hash verification, filename are ordered, etc.)
372 mfdelta = mfl.get(dir, n).read()
372 mfdelta = mfl.get(dir, n).read()
373 except Exception as inst:
373 except Exception as inst:
374 msg = _(b"reading full manifest %s") % short(n)
374 msg = _(b"reading full manifest %s") % short(n)
375 self._exc(lr, msg, inst, label)
375 self._exc(lr, msg, inst, label)
376
376
377 if not dir:
377 if not dir:
378 progress.complete()
378 progress.complete()
379
379
380 if self.havemf:
380 if self.havemf:
381 # since we delete entry in `mflinkrevs` during iteration, any
381 # since we delete entry in `mflinkrevs` during iteration, any
382 # remaining entries are "missing". We need to issue errors for them.
382 # remaining entries are "missing". We need to issue errors for them.
383 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
383 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
384 for c, m in sorted(changesetpairs):
384 for c, m in sorted(changesetpairs):
385 if dir:
385 if dir:
386 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
386 self._err(c, WARN_PARENT_DIR_UNKNOWN_REV % short(m), label)
387 else:
387 else:
388 msg = _(b"changeset refers to unknown revision %s")
388 msg = _(b"changeset refers to unknown revision %s")
389 msg %= short(m)
389 msg %= short(m)
390 self._err(c, msg, label)
390 self._err(c, msg, label)
391
391
392 if not dir and subdirnodes:
392 if not dir and subdirnodes:
393 self.ui.status(_(b"checking directory manifests\n"))
393 self.ui.status(_(b"checking directory manifests\n"))
394 storefiles = set()
394 storefiles = set()
395 subdirs = set()
395 subdirs = set()
396 revlogv1 = self.revlogv1
396 revlogv1 = self.revlogv1
397 undecodable = []
397 undecodable = []
398 for t, f, size in repo.store.datafiles(undecodable=undecodable):
398 for t, f, size in repo.store.datafiles(undecodable=undecodable):
399 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
399 if (size > 0 or not revlogv1) and f.startswith(b'meta/'):
400 storefiles.add(_normpath(f))
400 storefiles.add(_normpath(f))
401 subdirs.add(os.path.dirname(f))
401 subdirs.add(os.path.dirname(f))
402 for f in undecodable:
402 for f in undecodable:
403 self._err(None, _(b"cannot decode filename '%s'") % f)
403 self._err(None, _(b"cannot decode filename '%s'") % f)
404 subdirprogress = ui.makeprogress(
404 subdirprogress = ui.makeprogress(
405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
405 _(b'checking'), unit=_(b'manifests'), total=len(subdirs)
406 )
406 )
407
407
408 for subdir, linkrevs in subdirnodes.items():
408 for subdir, linkrevs in subdirnodes.items():
409 subdirfilenodes = self._verifymanifest(
409 subdirfilenodes = self._verifymanifest(
410 linkrevs, subdir, storefiles, subdirprogress
410 linkrevs, subdir, storefiles, subdirprogress
411 )
411 )
412 for f, onefilenodes in subdirfilenodes.items():
412 for f, onefilenodes in subdirfilenodes.items():
413 filenodes.setdefault(f, {}).update(onefilenodes)
413 filenodes.setdefault(f, {}).update(onefilenodes)
414
414
415 if not dir and subdirnodes:
415 if not dir and subdirnodes:
416 assert subdirprogress is not None # help pytype
416 assert subdirprogress is not None # help pytype
417 subdirprogress.complete()
417 subdirprogress.complete()
418 if self.warnorphanstorefiles:
418 if self.warnorphanstorefiles:
419 for f in sorted(storefiles):
419 for f in sorted(storefiles):
420 self._warn(_(b"warning: orphan data file '%s'") % f)
420 self._warn(_(b"warning: orphan data file '%s'") % f)
421
421
422 return filenodes
422 return filenodes
423
423
424 def _crosscheckfiles(self, filelinkrevs, filenodes):
424 def _crosscheckfiles(self, filelinkrevs, filenodes):
425 repo = self.repo
425 repo = self.repo
426 ui = self.ui
426 ui = self.ui
427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
427 ui.status(_(b"crosschecking files in changesets and manifests\n"))
428
428
429 total = len(filelinkrevs) + len(filenodes)
429 total = len(filelinkrevs) + len(filenodes)
430 progress = ui.makeprogress(
430 progress = ui.makeprogress(
431 _(b'crosschecking'), unit=_(b'files'), total=total
431 _(b'crosschecking'), unit=_(b'files'), total=total
432 )
432 )
433 if self.havemf:
433 if self.havemf:
434 for f in sorted(filelinkrevs):
434 for f in sorted(filelinkrevs):
435 progress.increment()
435 progress.increment()
436 if f not in filenodes:
436 if f not in filenodes:
437 lr = filelinkrevs[f][0]
437 lr = filelinkrevs[f][0]
438 self._err(lr, _(b"in changeset but not in manifest"), f)
438 self._err(lr, _(b"in changeset but not in manifest"), f)
439
439
440 if self.havecl:
440 if self.havecl:
441 for f in sorted(filenodes):
441 for f in sorted(filenodes):
442 progress.increment()
442 progress.increment()
443 if f not in filelinkrevs:
443 if f not in filelinkrevs:
444 try:
444 try:
445 fl = repo.file(f)
445 fl = repo.file(f)
446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
446 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
447 except Exception:
447 except Exception:
448 lr = None
448 lr = None
449 self._err(lr, _(b"in manifest but not in changeset"), f)
449 self._err(lr, _(b"in manifest but not in changeset"), f)
450
450
451 progress.complete()
451 progress.complete()
452
452
453 def _verifyfiles(self, filenodes, filelinkrevs):
453 def _verifyfiles(self, filenodes, filelinkrevs):
454 repo = self.repo
454 repo = self.repo
455 ui = self.ui
455 ui = self.ui
456 lrugetctx = self.lrugetctx
456 lrugetctx = self.lrugetctx
457 revlogv1 = self.revlogv1
457 revlogv1 = self.revlogv1
458 havemf = self.havemf
458 havemf = self.havemf
459 ui.status(_(b"checking files\n"))
459 ui.status(_(b"checking files\n"))
460
460
461 storefiles = set()
461 storefiles = set()
462 undecodable = []
462 undecodable = []
463 for t, f, size in repo.store.datafiles(undecodable=undecodable):
463 for t, f, size in repo.store.datafiles(undecodable=undecodable):
464 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
464 if (size > 0 or not revlogv1) and f.startswith(b'data/'):
465 storefiles.add(_normpath(f))
465 storefiles.add(_normpath(f))
466 for f in undecodable:
466 for f in undecodable:
467 self._err(None, _(b"cannot decode filename '%s'") % f)
467 self._err(None, _(b"cannot decode filename '%s'") % f)
468
468
469 state = {
469 state = {
470 # TODO this assumes revlog storage for changelog.
470 # TODO this assumes revlog storage for changelog.
471 b'expectedversion': self.repo.changelog._format_version,
471 b'expectedversion': self.repo.changelog._format_version,
472 b'skipflags': self.skipflags,
472 b'skipflags': self.skipflags,
473 # experimental config: censor.policy
473 # experimental config: censor.policy
474 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
474 b'erroroncensored': ui.config(b'censor', b'policy') == b'abort',
475 }
475 }
476
476
477 files = sorted(set(filenodes) | set(filelinkrevs))
477 files = sorted(set(filenodes) | set(filelinkrevs))
478 revisions = 0
478 revisions = 0
479 progress = ui.makeprogress(
479 progress = ui.makeprogress(
480 _(b'checking'), unit=_(b'files'), total=len(files)
480 _(b'checking'), unit=_(b'files'), total=len(files)
481 )
481 )
482 for i, f in enumerate(files):
482 for i, f in enumerate(files):
483 progress.update(i, item=f)
483 progress.update(i, item=f)
484 try:
484 try:
485 linkrevs = filelinkrevs[f]
485 linkrevs = filelinkrevs[f]
486 except KeyError:
486 except KeyError:
487 # in manifest but not in changelog
487 # in manifest but not in changelog
488 linkrevs = []
488 linkrevs = []
489
489
490 if linkrevs:
490 if linkrevs:
491 lr = linkrevs[0]
491 lr = linkrevs[0]
492 else:
492 else:
493 lr = None
493 lr = None
494
494
495 try:
495 try:
496 fl = repo.file(f)
496 fl = repo.file(f)
497 except error.StorageError as e:
497 except error.StorageError as e:
498 self._err(lr, _(b"broken revlog! (%s)") % e, f)
498 self._err(lr, _(b"broken revlog! (%s)") % e, f)
499 continue
499 continue
500
500
501 for ff in fl.files():
501 for ff in fl.files():
502 try:
502 try:
503 storefiles.remove(ff)
503 storefiles.remove(ff)
504 except KeyError:
504 except KeyError:
505 if self.warnorphanstorefiles:
505 if self.warnorphanstorefiles:
506 msg = _(b" warning: revlog '%s' not in fncache!")
506 msg = _(b" warning: revlog '%s' not in fncache!")
507 self._warn(msg % ff)
507 self._warn(msg % ff)
508 self.fncachewarned = True
508 self.fncachewarned = True
509
509
510 if not len(fl) and (self.havecl or self.havemf):
510 if not len(fl) and (self.havecl or self.havemf):
511 self._err(lr, _(b"empty or missing %s") % f)
511 self._err(lr, _(b"empty or missing %s") % f)
512 else:
512 else:
513 # Guard against implementations not setting this.
513 # Guard against implementations not setting this.
514 state[b'skipread'] = set()
514 state[b'skipread'] = set()
515 state[b'safe_renamed'] = set()
515 state[b'safe_renamed'] = set()
516
516
517 for problem in fl.verifyintegrity(state):
517 for problem in fl.verifyintegrity(state):
518 if problem.node is not None:
518 if problem.node is not None:
519 linkrev = fl.linkrev(fl.rev(problem.node))
519 linkrev = fl.linkrev(fl.rev(problem.node))
520 else:
520 else:
521 linkrev = None
521 linkrev = None
522
522
523 if problem.warning:
523 if problem.warning:
524 self._warn(problem.warning)
524 self._warn(problem.warning)
525 elif problem.error:
525 elif problem.error:
526 linkrev_msg = linkrev if linkrev is not None else lr
526 linkrev_msg = linkrev if linkrev is not None else lr
527 self._err(linkrev_msg, problem.error, f)
527 self._err(linkrev_msg, problem.error, f)
528 else:
528 else:
529 raise error.ProgrammingError(
529 raise error.ProgrammingError(
530 b'problem instance does not set warning or error '
530 b'problem instance does not set warning or error '
531 b'attribute: %s' % problem.msg
531 b'attribute: %s' % problem.msg
532 )
532 )
533
533
534 seen = {}
534 seen = {}
535 for i in fl:
535 for i in fl:
536 revisions += 1
536 revisions += 1
537 n = fl.node(i)
537 n = fl.node(i)
538 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
538 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
539 if f in filenodes:
539 if f in filenodes:
540 if havemf and n not in filenodes[f]:
540 if havemf and n not in filenodes[f]:
541 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
541 self._err(lr, _(b"%s not in manifests") % (short(n)), f)
542 else:
542 else:
543 del filenodes[f][n]
543 del filenodes[f][n]
544
544
545 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
545 if n in state[b'skipread'] and n not in state[b'safe_renamed']:
546 continue
546 continue
547
547
548 # check renames
548 # check renames
549 try:
549 try:
550 # This requires resolving fulltext (at least on revlogs,
550 # This requires resolving fulltext (at least on revlogs,
551 # though not with LFS revisions). We may want
551 # though not with LFS revisions). We may want
552 # ``verifyintegrity()`` to pass a set of nodes with
552 # ``verifyintegrity()`` to pass a set of nodes with
553 # rename metadata as an optimization.
553 # rename metadata as an optimization.
554 rp = fl.renamed(n)
554 rp = fl.renamed(n)
555 if rp:
555 if rp:
556 if lr is not None and ui.verbose:
556 if lr is not None and ui.verbose:
557 ctx = lrugetctx(lr)
557 ctx = lrugetctx(lr)
558 if not any(rp[0] in pctx for pctx in ctx.parents()):
558 if not any(rp[0] in pctx for pctx in ctx.parents()):
559 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
559 self._warn(WARN_UNKNOWN_COPY_SOURCE % (f, ctx))
560 fl2 = repo.file(rp[0])
560 fl2 = repo.file(rp[0])
561 if not len(fl2):
561 if not len(fl2):
562 m = _(b"empty or missing copy source revlog %s:%s")
562 m = _(b"empty or missing copy source revlog %s:%s")
563 self._err(lr, m % (rp[0], short(rp[1])), f)
563 self._err(lr, m % (rp[0], short(rp[1])), f)
564 elif rp[1] == self.repo.nullid:
564 elif rp[1] == self.repo.nullid:
565 msg = WARN_NULLID_COPY_SOURCE
565 msg = WARN_NULLID_COPY_SOURCE
566 msg %= (f, lr, rp[0], short(rp[1]))
566 msg %= (f, lr, rp[0], short(rp[1]))
567 ui.note(msg)
567 ui.note(msg)
568 else:
568 else:
569 fl2.rev(rp[1])
569 fl2.rev(rp[1])
570 except Exception as inst:
570 except Exception as inst:
571 self._exc(
571 self._exc(
572 lr, _(b"checking rename of %s") % short(n), inst, f
572 lr, _(b"checking rename of %s") % short(n), inst, f
573 )
573 )
574
574
575 # cross-check
575 # cross-check
576 if f in filenodes:
576 if f in filenodes:
577 fns = [(v, k) for k, v in pycompat.iteritems(filenodes[f])]
577 fns = [(v, k) for k, v in filenodes[f].items()]
578 for lr, node in sorted(fns):
578 for lr, node in sorted(fns):
579 msg = _(b"manifest refers to unknown revision %s")
579 msg = _(b"manifest refers to unknown revision %s")
580 self._err(lr, msg % short(node), f)
580 self._err(lr, msg % short(node), f)
581 progress.complete()
581 progress.complete()
582
582
583 if self.warnorphanstorefiles:
583 if self.warnorphanstorefiles:
584 for f in sorted(storefiles):
584 for f in sorted(storefiles):
585 self._warn(_(b"warning: orphan data file '%s'") % f)
585 self._warn(_(b"warning: orphan data file '%s'") % f)
586
586
587 return len(files), revisions
587 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now