##// END OF EJS Templates
verify: small refactoring and documentation in `_verifymanifest`...
marmoute -
r42043:9c5a6af7 default
parent child Browse files
Show More
@@ -1,533 +1,535
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 nullid,
14 nullid,
15 short,
15 short,
16 )
16 )
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 util,
22 util,
23 )
23 )
24
24
25 def verify(repo):
25 def verify(repo):
26 with repo.lock():
26 with repo.lock():
27 return verifier(repo).verify()
27 return verifier(repo).verify()
28
28
29 def _normpath(f):
29 def _normpath(f):
30 # under hg < 2.4, convert didn't sanitize paths properly, so a
30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 # converted repo may contain repeated slashes
31 # converted repo may contain repeated slashes
32 while '//' in f:
32 while '//' in f:
33 f = f.replace('//', '/')
33 f = f.replace('//', '/')
34 return f
34 return f
35
35
36 class verifier(object):
36 class verifier(object):
37 def __init__(self, repo):
37 def __init__(self, repo):
38 self.repo = repo.unfiltered()
38 self.repo = repo.unfiltered()
39 self.ui = repo.ui
39 self.ui = repo.ui
40 self.match = repo.narrowmatch()
40 self.match = repo.narrowmatch()
41 self.badrevs = set()
41 self.badrevs = set()
42 self.errors = 0
42 self.errors = 0
43 self.warnings = 0
43 self.warnings = 0
44 self.havecl = len(repo.changelog) > 0
44 self.havecl = len(repo.changelog) > 0
45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 self.refersmf = False
48 self.refersmf = False
49 self.fncachewarned = False
49 self.fncachewarned = False
50 # developer config: verify.skipflags
50 # developer config: verify.skipflags
51 self.skipflags = repo.ui.configint('verify', 'skipflags')
51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 self.warnorphanstorefiles = True
52 self.warnorphanstorefiles = True
53
53
54 def _warn(self, msg):
54 def _warn(self, msg):
55 """record a "warning" level issue"""
55 """record a "warning" level issue"""
56 self.ui.warn(msg + "\n")
56 self.ui.warn(msg + "\n")
57 self.warnings += 1
57 self.warnings += 1
58
58
59 def _err(self, linkrev, msg, filename=None):
59 def _err(self, linkrev, msg, filename=None):
60 """record a "error" level issue"""
60 """record a "error" level issue"""
61 if linkrev is not None:
61 if linkrev is not None:
62 self.badrevs.add(linkrev)
62 self.badrevs.add(linkrev)
63 linkrev = "%d" % linkrev
63 linkrev = "%d" % linkrev
64 else:
64 else:
65 linkrev = '?'
65 linkrev = '?'
66 msg = "%s: %s" % (linkrev, msg)
66 msg = "%s: %s" % (linkrev, msg)
67 if filename:
67 if filename:
68 msg = "%s@%s" % (filename, msg)
68 msg = "%s@%s" % (filename, msg)
69 self.ui.warn(" " + msg + "\n")
69 self.ui.warn(" " + msg + "\n")
70 self.errors += 1
70 self.errors += 1
71
71
72 def _exc(self, linkrev, msg, inst, filename=None):
72 def _exc(self, linkrev, msg, inst, filename=None):
73 """record exception raised during the verify process"""
73 """record exception raised during the verify process"""
74 fmsg = pycompat.bytestr(inst)
74 fmsg = pycompat.bytestr(inst)
75 if not fmsg:
75 if not fmsg:
76 fmsg = pycompat.byterepr(inst)
76 fmsg = pycompat.byterepr(inst)
77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
78
78
79 def _checkrevlog(self, obj, name, linkrev):
79 def _checkrevlog(self, obj, name, linkrev):
80 """verify high level property of a revlog
80 """verify high level property of a revlog
81
81
82 - revlog is present,
82 - revlog is present,
83 - revlog is non-empty,
83 - revlog is non-empty,
84 - sizes (index and data) are correct,
84 - sizes (index and data) are correct,
85 - revlog's format version is correct.
85 - revlog's format version is correct.
86 """
86 """
87 if not len(obj) and (self.havecl or self.havemf):
87 if not len(obj) and (self.havecl or self.havemf):
88 self._err(linkrev, _("empty or missing %s") % name)
88 self._err(linkrev, _("empty or missing %s") % name)
89 return
89 return
90
90
91 d = obj.checksize()
91 d = obj.checksize()
92 if d[0]:
92 if d[0]:
93 self.err(None, _("data length off by %d bytes") % d[0], name)
93 self.err(None, _("data length off by %d bytes") % d[0], name)
94 if d[1]:
94 if d[1]:
95 self.err(None, _("index contains %d extra bytes") % d[1], name)
95 self.err(None, _("index contains %d extra bytes") % d[1], name)
96
96
97 if obj.version != revlog.REVLOGV0:
97 if obj.version != revlog.REVLOGV0:
98 if not self.revlogv1:
98 if not self.revlogv1:
99 self._warn(_("warning: `%s' uses revlog format 1") % name)
99 self._warn(_("warning: `%s' uses revlog format 1") % name)
100 elif self.revlogv1:
100 elif self.revlogv1:
101 self._warn(_("warning: `%s' uses revlog format 0") % name)
101 self._warn(_("warning: `%s' uses revlog format 0") % name)
102
102
103 def _checkentry(self, obj, i, node, seen, linkrevs, f):
103 def _checkentry(self, obj, i, node, seen, linkrevs, f):
104 """verify a single revlog entry
104 """verify a single revlog entry
105
105
106 arguments are:
106 arguments are:
107 - obj: the source revlog
107 - obj: the source revlog
108 - i: the revision number
108 - i: the revision number
109 - node: the revision node id
109 - node: the revision node id
110 - seen: nodes previously seen for this revlog
110 - seen: nodes previously seen for this revlog
111 - linkrevs: [changelog-revisions] introducing "node"
111 - linkrevs: [changelog-revisions] introducing "node"
112 - f: string label ("changelog", "manifest", or filename)
112 - f: string label ("changelog", "manifest", or filename)
113
113
114 Performs the following checks:
114 Performs the following checks:
115 - linkrev points to an existing changelog revision,
115 - linkrev points to an existing changelog revision,
116 - linkrev points to a changelog revision that introduces this revision,
116 - linkrev points to a changelog revision that introduces this revision,
117 - linkrev points to the lowest of these changesets,
117 - linkrev points to the lowest of these changesets,
118 - both parents exist in the revlog,
118 - both parents exist in the revlog,
119 - the revision is not duplicated.
119 - the revision is not duplicated.
120
120
121 Return the linkrev of the revision (or None for changelog's revisions).
121 Return the linkrev of the revision (or None for changelog's revisions).
122 """
122 """
123 lr = obj.linkrev(obj.rev(node))
123 lr = obj.linkrev(obj.rev(node))
124 if lr < 0 or (self.havecl and lr not in linkrevs):
124 if lr < 0 or (self.havecl and lr not in linkrevs):
125 if lr < 0 or lr >= len(self.repo.changelog):
125 if lr < 0 or lr >= len(self.repo.changelog):
126 msg = _("rev %d points to nonexistent changeset %d")
126 msg = _("rev %d points to nonexistent changeset %d")
127 else:
127 else:
128 msg = _("rev %d points to unexpected changeset %d")
128 msg = _("rev %d points to unexpected changeset %d")
129 self._err(None, msg % (i, lr), f)
129 self._err(None, msg % (i, lr), f)
130 if linkrevs:
130 if linkrevs:
131 if f and len(linkrevs) > 1:
131 if f and len(linkrevs) > 1:
132 try:
132 try:
133 # attempt to filter down to real linkrevs
133 # attempt to filter down to real linkrevs
134 linkrevs = [l for l in linkrevs
134 linkrevs = [l for l in linkrevs
135 if self.lrugetctx(l)[f].filenode() == node]
135 if self.lrugetctx(l)[f].filenode() == node]
136 except Exception:
136 except Exception:
137 pass
137 pass
138 self._warn(_(" (expected %s)") % " ".join
138 self._warn(_(" (expected %s)") % " ".join
139 (map(pycompat.bytestr, linkrevs)))
139 (map(pycompat.bytestr, linkrevs)))
140 lr = None # can't be trusted
140 lr = None # can't be trusted
141
141
142 try:
142 try:
143 p1, p2 = obj.parents(node)
143 p1, p2 = obj.parents(node)
144 if p1 not in seen and p1 != nullid:
144 if p1 not in seen and p1 != nullid:
145 self._err(lr, _("unknown parent 1 %s of %s") %
145 self._err(lr, _("unknown parent 1 %s of %s") %
146 (short(p1), short(node)), f)
146 (short(p1), short(node)), f)
147 if p2 not in seen and p2 != nullid:
147 if p2 not in seen and p2 != nullid:
148 self._err(lr, _("unknown parent 2 %s of %s") %
148 self._err(lr, _("unknown parent 2 %s of %s") %
149 (short(p2), short(node)), f)
149 (short(p2), short(node)), f)
150 except Exception as inst:
150 except Exception as inst:
151 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
151 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
152
152
153 if node in seen:
153 if node in seen:
154 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
154 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
155 seen[node] = i
155 seen[node] = i
156 return lr
156 return lr
157
157
158 def verify(self):
158 def verify(self):
159 """verify the content of the Mercurial repository
159 """verify the content of the Mercurial repository
160
160
161 This method run all verifications, displaying issues as they are found.
161 This method run all verifications, displaying issues as they are found.
162
162
163 return 1 if any error have been encountered, 0 otherwise."""
163 return 1 if any error have been encountered, 0 otherwise."""
164 # initial validation and generic report
164 # initial validation and generic report
165 repo = self.repo
165 repo = self.repo
166 ui = repo.ui
166 ui = repo.ui
167 if not repo.url().startswith('file:'):
167 if not repo.url().startswith('file:'):
168 raise error.Abort(_("cannot verify bundle or remote repos"))
168 raise error.Abort(_("cannot verify bundle or remote repos"))
169
169
170 if os.path.exists(repo.sjoin("journal")):
170 if os.path.exists(repo.sjoin("journal")):
171 ui.warn(_("abandoned transaction found - run hg recover\n"))
171 ui.warn(_("abandoned transaction found - run hg recover\n"))
172
172
173 if ui.verbose or not self.revlogv1:
173 if ui.verbose or not self.revlogv1:
174 ui.status(_("repository uses revlog format %d\n") %
174 ui.status(_("repository uses revlog format %d\n") %
175 (self.revlogv1 and 1 or 0))
175 (self.revlogv1 and 1 or 0))
176
176
177 # data verification
177 # data verification
178 mflinkrevs, filelinkrevs = self._verifychangelog()
178 mflinkrevs, filelinkrevs = self._verifychangelog()
179 filenodes = self._verifymanifest(mflinkrevs)
179 filenodes = self._verifymanifest(mflinkrevs)
180 del mflinkrevs
180 del mflinkrevs
181 self._crosscheckfiles(filelinkrevs, filenodes)
181 self._crosscheckfiles(filelinkrevs, filenodes)
182 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
182 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
183
183
184 # final report
184 # final report
185 ui.status(_("checked %d changesets with %d changes to %d files\n") %
185 ui.status(_("checked %d changesets with %d changes to %d files\n") %
186 (len(repo.changelog), filerevisions, totalfiles))
186 (len(repo.changelog), filerevisions, totalfiles))
187 if self.warnings:
187 if self.warnings:
188 ui.warn(_("%d warnings encountered!\n") % self.warnings)
188 ui.warn(_("%d warnings encountered!\n") % self.warnings)
189 if self.fncachewarned:
189 if self.fncachewarned:
190 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
190 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
191 'corrupt fncache\n'))
191 'corrupt fncache\n'))
192 if self.errors:
192 if self.errors:
193 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
193 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
194 if self.badrevs:
194 if self.badrevs:
195 ui.warn(_("(first damaged changeset appears to be %d)\n")
195 ui.warn(_("(first damaged changeset appears to be %d)\n")
196 % min(self.badrevs))
196 % min(self.badrevs))
197 return 1
197 return 1
198 return 0
198 return 0
199
199
200 def _verifychangelog(self):
200 def _verifychangelog(self):
201 """verify the changelog of a repository
201 """verify the changelog of a repository
202
202
203 The following checks are performed:
203 The following checks are performed:
204 - all of `_checkrevlog` checks,
204 - all of `_checkrevlog` checks,
205 - all of `_checkentry` checks (for each revisions),
205 - all of `_checkentry` checks (for each revisions),
206 - each revision can be read.
206 - each revision can be read.
207
207
208 The function returns some of the data observed in the changesets as a
208 The function returns some of the data observed in the changesets as a
209 (mflinkrevs, filelinkrevs) tuples:
209 (mflinkrevs, filelinkrevs) tuples:
210 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
210 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
211 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
211 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
212
212
213 If a matcher was specified, filelinkrevs will only contains matched
213 If a matcher was specified, filelinkrevs will only contains matched
214 files.
214 files.
215 """
215 """
216 ui = self.ui
216 ui = self.ui
217 repo = self.repo
217 repo = self.repo
218 match = self.match
218 match = self.match
219 cl = repo.changelog
219 cl = repo.changelog
220
220
221 ui.status(_("checking changesets\n"))
221 ui.status(_("checking changesets\n"))
222 mflinkrevs = {}
222 mflinkrevs = {}
223 filelinkrevs = {}
223 filelinkrevs = {}
224 seen = {}
224 seen = {}
225 self._checkrevlog(cl, "changelog", 0)
225 self._checkrevlog(cl, "changelog", 0)
226 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
226 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
227 total=len(repo))
227 total=len(repo))
228 for i in repo:
228 for i in repo:
229 progress.update(i)
229 progress.update(i)
230 n = cl.node(i)
230 n = cl.node(i)
231 self._checkentry(cl, i, n, seen, [i], "changelog")
231 self._checkentry(cl, i, n, seen, [i], "changelog")
232
232
233 try:
233 try:
234 changes = cl.read(n)
234 changes = cl.read(n)
235 if changes[0] != nullid:
235 if changes[0] != nullid:
236 mflinkrevs.setdefault(changes[0], []).append(i)
236 mflinkrevs.setdefault(changes[0], []).append(i)
237 self.refersmf = True
237 self.refersmf = True
238 for f in changes[3]:
238 for f in changes[3]:
239 if match(f):
239 if match(f):
240 filelinkrevs.setdefault(_normpath(f), []).append(i)
240 filelinkrevs.setdefault(_normpath(f), []).append(i)
241 except Exception as inst:
241 except Exception as inst:
242 self.refersmf = True
242 self.refersmf = True
243 self._exc(i, _("unpacking changeset %s") % short(n), inst)
243 self._exc(i, _("unpacking changeset %s") % short(n), inst)
244 progress.complete()
244 progress.complete()
245 return mflinkrevs, filelinkrevs
245 return mflinkrevs, filelinkrevs
246
246
247 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
247 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
248 subdirprogress=None):
248 subdirprogress=None):
249 """verify the manifestlog content
249 """verify the manifestlog content
250
250
251 Inputs:
251 Inputs:
252 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
252 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
253 - dir: a subdirectory to check (for tree manifest repo)
253 - dir: a subdirectory to check (for tree manifest repo)
254 - storefiles: set of currently "orphan" files.
254 - storefiles: set of currently "orphan" files.
255 - subdirprogress: a progress object
255 - subdirprogress: a progress object
256
256
257 This function checks:
257 This function checks:
258 * all of `_checkrevlog` checks (for all manifest related revlogs)
258 * all of `_checkrevlog` checks (for all manifest related revlogs)
259 * all of `_checkentry` checks (for all manifest related revisions)
259 * all of `_checkentry` checks (for all manifest related revisions)
260 * nodes for subdirectory exists in the sub-directory manifest
260 * nodes for subdirectory exists in the sub-directory manifest
261 * each manifest entries have a file path
261 * each manifest entries have a file path
262 * each manifest node refered in mflinkrevs exist in the manifest log
262 * each manifest node refered in mflinkrevs exist in the manifest log
263
263
264 If tree manifest is in use and a matchers is specified, only the
264 If tree manifest is in use and a matchers is specified, only the
265 sub-directories matching it will be verified.
265 sub-directories matching it will be verified.
266
266
267 return a two level mapping:
267 return a two level mapping:
268 {"path" -> { filenode -> changelog-revision}}
268 {"path" -> { filenode -> changelog-revision}}
269
269
270 This mapping primarily contains entries for every files in the
270 This mapping primarily contains entries for every files in the
271 repository. In addition, when tree-manifest is used, it also contains
271 repository. In addition, when tree-manifest is used, it also contains
272 sub-directory entries.
272 sub-directory entries.
273
273
274 If a matcher is provided, only matching paths will be included.
274 If a matcher is provided, only matching paths will be included.
275 """
275 """
276 repo = self.repo
276 repo = self.repo
277 ui = self.ui
277 ui = self.ui
278 match = self.match
278 match = self.match
279 mfl = self.repo.manifestlog
279 mfl = self.repo.manifestlog
280 mf = mfl.getstorage(dir)
280 mf = mfl.getstorage(dir)
281
281
282 if not dir:
282 if not dir:
283 self.ui.status(_("checking manifests\n"))
283 self.ui.status(_("checking manifests\n"))
284
284
285 filenodes = {}
285 filenodes = {}
286 subdirnodes = {}
286 subdirnodes = {}
287 seen = {}
287 seen = {}
288 label = "manifest"
288 label = "manifest"
289 if dir:
289 if dir:
290 label = dir
290 label = dir
291 revlogfiles = mf.files()
291 revlogfiles = mf.files()
292 storefiles.difference_update(revlogfiles)
292 storefiles.difference_update(revlogfiles)
293 if subdirprogress: # should be true since we're in a subdirectory
293 if subdirprogress: # should be true since we're in a subdirectory
294 subdirprogress.increment()
294 subdirprogress.increment()
295 if self.refersmf:
295 if self.refersmf:
296 # Do not check manifest if there are only changelog entries with
296 # Do not check manifest if there are only changelog entries with
297 # null manifests.
297 # null manifests.
298 self._checkrevlog(mf, label, 0)
298 self._checkrevlog(mf, label, 0)
299 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
299 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
300 total=len(mf))
300 total=len(mf))
301 for i in mf:
301 for i in mf:
302 if not dir:
302 if not dir:
303 progress.update(i)
303 progress.update(i)
304 n = mf.node(i)
304 n = mf.node(i)
305 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
305 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
306 if n in mflinkrevs:
306 if n in mflinkrevs:
307 del mflinkrevs[n]
307 del mflinkrevs[n]
308 elif dir:
308 elif dir:
309 self._err(lr, _("%s not in parent-directory manifest") %
309 self._err(lr, _("%s not in parent-directory manifest") %
310 short(n), label)
310 short(n), label)
311 else:
311 else:
312 self._err(lr, _("%s not in changesets") % short(n), label)
312 self._err(lr, _("%s not in changesets") % short(n), label)
313
313
314 try:
314 try:
315 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
315 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
316 for f, fn, fl in mfdelta.iterentries():
316 for f, fn, fl in mfdelta.iterentries():
317 if not f:
317 if not f:
318 self._err(lr, _("entry without name in manifest"))
318 self._err(lr, _("entry without name in manifest"))
319 elif f == "/dev/null": # ignore this in very old repos
319 elif f == "/dev/null": # ignore this in very old repos
320 continue
320 continue
321 fullpath = dir + _normpath(f)
321 fullpath = dir + _normpath(f)
322 if fl == 't':
322 if fl == 't':
323 if not match.visitdir(fullpath):
323 if not match.visitdir(fullpath):
324 continue
324 continue
325 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
325 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
326 fn, []).append(lr)
326 fn, []).append(lr)
327 else:
327 else:
328 if not match(fullpath):
328 if not match(fullpath):
329 continue
329 continue
330 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
330 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
331 except Exception as inst:
331 except Exception as inst:
332 self._exc(lr, _("reading delta %s") % short(n), inst, label)
332 self._exc(lr, _("reading delta %s") % short(n), inst, label)
333 if not dir:
333 if not dir:
334 progress.complete()
334 progress.complete()
335
335
336 if self.havemf:
336 if self.havemf:
337 for c, m in sorted([(c, m) for m in mflinkrevs
337 # since we delete entry in `mflinkrevs` during iteration, any
338 for c in mflinkrevs[m]]):
338 # remaining entries are "missing". We need to issue errors for them.
339 changesetpairs = [(c, m) for m in mflinkrevs for c in mflinkrevs[m]]
340 for c, m in sorted(changesetpairs):
339 if dir:
341 if dir:
340 self._err(c, _("parent-directory manifest refers to unknown"
342 self._err(c, _("parent-directory manifest refers to unknown"
341 " revision %s") % short(m), label)
343 " revision %s") % short(m), label)
342 else:
344 else:
343 self._err(c, _("changeset refers to unknown revision %s") %
345 self._err(c, _("changeset refers to unknown revision %s") %
344 short(m), label)
346 short(m), label)
345
347
346 if not dir and subdirnodes:
348 if not dir and subdirnodes:
347 self.ui.status(_("checking directory manifests\n"))
349 self.ui.status(_("checking directory manifests\n"))
348 storefiles = set()
350 storefiles = set()
349 subdirs = set()
351 subdirs = set()
350 revlogv1 = self.revlogv1
352 revlogv1 = self.revlogv1
351 for f, f2, size in repo.store.datafiles():
353 for f, f2, size in repo.store.datafiles():
352 if not f:
354 if not f:
353 self._err(None, _("cannot decode filename '%s'") % f2)
355 self._err(None, _("cannot decode filename '%s'") % f2)
354 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
356 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
355 storefiles.add(_normpath(f))
357 storefiles.add(_normpath(f))
356 subdirs.add(os.path.dirname(f))
358 subdirs.add(os.path.dirname(f))
357 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
359 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
358 total=len(subdirs))
360 total=len(subdirs))
359
361
360 for subdir, linkrevs in subdirnodes.iteritems():
362 for subdir, linkrevs in subdirnodes.iteritems():
361 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
363 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
362 subdirprogress)
364 subdirprogress)
363 for f, onefilenodes in subdirfilenodes.iteritems():
365 for f, onefilenodes in subdirfilenodes.iteritems():
364 filenodes.setdefault(f, {}).update(onefilenodes)
366 filenodes.setdefault(f, {}).update(onefilenodes)
365
367
366 if not dir and subdirnodes:
368 if not dir and subdirnodes:
367 subdirprogress.complete()
369 subdirprogress.complete()
368 if self.warnorphanstorefiles:
370 if self.warnorphanstorefiles:
369 for f in sorted(storefiles):
371 for f in sorted(storefiles):
370 self._warn(_("warning: orphan data file '%s'") % f)
372 self._warn(_("warning: orphan data file '%s'") % f)
371
373
372 return filenodes
374 return filenodes
373
375
374 def _crosscheckfiles(self, filelinkrevs, filenodes):
376 def _crosscheckfiles(self, filelinkrevs, filenodes):
375 repo = self.repo
377 repo = self.repo
376 ui = self.ui
378 ui = self.ui
377 ui.status(_("crosschecking files in changesets and manifests\n"))
379 ui.status(_("crosschecking files in changesets and manifests\n"))
378
380
379 total = len(filelinkrevs) + len(filenodes)
381 total = len(filelinkrevs) + len(filenodes)
380 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
382 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
381 total=total)
383 total=total)
382 if self.havemf:
384 if self.havemf:
383 for f in sorted(filelinkrevs):
385 for f in sorted(filelinkrevs):
384 progress.increment()
386 progress.increment()
385 if f not in filenodes:
387 if f not in filenodes:
386 lr = filelinkrevs[f][0]
388 lr = filelinkrevs[f][0]
387 self._err(lr, _("in changeset but not in manifest"), f)
389 self._err(lr, _("in changeset but not in manifest"), f)
388
390
389 if self.havecl:
391 if self.havecl:
390 for f in sorted(filenodes):
392 for f in sorted(filenodes):
391 progress.increment()
393 progress.increment()
392 if f not in filelinkrevs:
394 if f not in filelinkrevs:
393 try:
395 try:
394 fl = repo.file(f)
396 fl = repo.file(f)
395 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
397 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
396 except Exception:
398 except Exception:
397 lr = None
399 lr = None
398 self._err(lr, _("in manifest but not in changeset"), f)
400 self._err(lr, _("in manifest but not in changeset"), f)
399
401
400 progress.complete()
402 progress.complete()
401
403
402 def _verifyfiles(self, filenodes, filelinkrevs):
404 def _verifyfiles(self, filenodes, filelinkrevs):
403 repo = self.repo
405 repo = self.repo
404 ui = self.ui
406 ui = self.ui
405 lrugetctx = self.lrugetctx
407 lrugetctx = self.lrugetctx
406 revlogv1 = self.revlogv1
408 revlogv1 = self.revlogv1
407 havemf = self.havemf
409 havemf = self.havemf
408 ui.status(_("checking files\n"))
410 ui.status(_("checking files\n"))
409
411
410 storefiles = set()
412 storefiles = set()
411 for f, f2, size in repo.store.datafiles():
413 for f, f2, size in repo.store.datafiles():
412 if not f:
414 if not f:
413 self._err(None, _("cannot decode filename '%s'") % f2)
415 self._err(None, _("cannot decode filename '%s'") % f2)
414 elif (size > 0 or not revlogv1) and f.startswith('data/'):
416 elif (size > 0 or not revlogv1) and f.startswith('data/'):
415 storefiles.add(_normpath(f))
417 storefiles.add(_normpath(f))
416
418
417 state = {
419 state = {
418 # TODO this assumes revlog storage for changelog.
420 # TODO this assumes revlog storage for changelog.
419 'expectedversion': self.repo.changelog.version & 0xFFFF,
421 'expectedversion': self.repo.changelog.version & 0xFFFF,
420 'skipflags': self.skipflags,
422 'skipflags': self.skipflags,
421 # experimental config: censor.policy
423 # experimental config: censor.policy
422 'erroroncensored': ui.config('censor', 'policy') == 'abort',
424 'erroroncensored': ui.config('censor', 'policy') == 'abort',
423 }
425 }
424
426
425 files = sorted(set(filenodes) | set(filelinkrevs))
427 files = sorted(set(filenodes) | set(filelinkrevs))
426 revisions = 0
428 revisions = 0
427 progress = ui.makeprogress(_('checking'), unit=_('files'),
429 progress = ui.makeprogress(_('checking'), unit=_('files'),
428 total=len(files))
430 total=len(files))
429 for i, f in enumerate(files):
431 for i, f in enumerate(files):
430 progress.update(i, item=f)
432 progress.update(i, item=f)
431 try:
433 try:
432 linkrevs = filelinkrevs[f]
434 linkrevs = filelinkrevs[f]
433 except KeyError:
435 except KeyError:
434 # in manifest but not in changelog
436 # in manifest but not in changelog
435 linkrevs = []
437 linkrevs = []
436
438
437 if linkrevs:
439 if linkrevs:
438 lr = linkrevs[0]
440 lr = linkrevs[0]
439 else:
441 else:
440 lr = None
442 lr = None
441
443
442 try:
444 try:
443 fl = repo.file(f)
445 fl = repo.file(f)
444 except error.StorageError as e:
446 except error.StorageError as e:
445 self._err(lr, _("broken revlog! (%s)") % e, f)
447 self._err(lr, _("broken revlog! (%s)") % e, f)
446 continue
448 continue
447
449
448 for ff in fl.files():
450 for ff in fl.files():
449 try:
451 try:
450 storefiles.remove(ff)
452 storefiles.remove(ff)
451 except KeyError:
453 except KeyError:
452 if self.warnorphanstorefiles:
454 if self.warnorphanstorefiles:
453 self._warn(_(" warning: revlog '%s' not in fncache!") %
455 self._warn(_(" warning: revlog '%s' not in fncache!") %
454 ff)
456 ff)
455 self.fncachewarned = True
457 self.fncachewarned = True
456
458
457 if not len(fl) and (self.havecl or self.havemf):
459 if not len(fl) and (self.havecl or self.havemf):
458 self._err(lr, _("empty or missing %s") % f)
460 self._err(lr, _("empty or missing %s") % f)
459 else:
461 else:
460 # Guard against implementations not setting this.
462 # Guard against implementations not setting this.
461 state['skipread'] = set()
463 state['skipread'] = set()
462 for problem in fl.verifyintegrity(state):
464 for problem in fl.verifyintegrity(state):
463 if problem.node is not None:
465 if problem.node is not None:
464 linkrev = fl.linkrev(fl.rev(problem.node))
466 linkrev = fl.linkrev(fl.rev(problem.node))
465 else:
467 else:
466 linkrev = None
468 linkrev = None
467
469
468 if problem.warning:
470 if problem.warning:
469 self._warn(problem.warning)
471 self._warn(problem.warning)
470 elif problem.error:
472 elif problem.error:
471 self._err(linkrev if linkrev is not None else lr,
473 self._err(linkrev if linkrev is not None else lr,
472 problem.error, f)
474 problem.error, f)
473 else:
475 else:
474 raise error.ProgrammingError(
476 raise error.ProgrammingError(
475 'problem instance does not set warning or error '
477 'problem instance does not set warning or error '
476 'attribute: %s' % problem.msg)
478 'attribute: %s' % problem.msg)
477
479
478 seen = {}
480 seen = {}
479 for i in fl:
481 for i in fl:
480 revisions += 1
482 revisions += 1
481 n = fl.node(i)
483 n = fl.node(i)
482 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
484 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
483 if f in filenodes:
485 if f in filenodes:
484 if havemf and n not in filenodes[f]:
486 if havemf and n not in filenodes[f]:
485 self._err(lr, _("%s not in manifests") % (short(n)), f)
487 self._err(lr, _("%s not in manifests") % (short(n)), f)
486 else:
488 else:
487 del filenodes[f][n]
489 del filenodes[f][n]
488
490
489 if n in state['skipread']:
491 if n in state['skipread']:
490 continue
492 continue
491
493
492 # check renames
494 # check renames
493 try:
495 try:
494 # This requires resolving fulltext (at least on revlogs). We
496 # This requires resolving fulltext (at least on revlogs). We
495 # may want ``verifyintegrity()`` to pass a set of nodes with
497 # may want ``verifyintegrity()`` to pass a set of nodes with
496 # rename metadata as an optimization.
498 # rename metadata as an optimization.
497 rp = fl.renamed(n)
499 rp = fl.renamed(n)
498 if rp:
500 if rp:
499 if lr is not None and ui.verbose:
501 if lr is not None and ui.verbose:
500 ctx = lrugetctx(lr)
502 ctx = lrugetctx(lr)
501 if not any(rp[0] in pctx for pctx in ctx.parents()):
503 if not any(rp[0] in pctx for pctx in ctx.parents()):
502 self._warn(_("warning: copy source of '%s' not"
504 self._warn(_("warning: copy source of '%s' not"
503 " in parents of %s") % (f, ctx))
505 " in parents of %s") % (f, ctx))
504 fl2 = repo.file(rp[0])
506 fl2 = repo.file(rp[0])
505 if not len(fl2):
507 if not len(fl2):
506 self._err(lr,
508 self._err(lr,
507 _("empty or missing copy source revlog "
509 _("empty or missing copy source revlog "
508 "%s:%s") % (rp[0],
510 "%s:%s") % (rp[0],
509 short(rp[1])),
511 short(rp[1])),
510 f)
512 f)
511 elif rp[1] == nullid:
513 elif rp[1] == nullid:
512 ui.note(_("warning: %s@%s: copy source"
514 ui.note(_("warning: %s@%s: copy source"
513 " revision is nullid %s:%s\n")
515 " revision is nullid %s:%s\n")
514 % (f, lr, rp[0], short(rp[1])))
516 % (f, lr, rp[0], short(rp[1])))
515 else:
517 else:
516 fl2.rev(rp[1])
518 fl2.rev(rp[1])
517 except Exception as inst:
519 except Exception as inst:
518 self._exc(lr, _("checking rename of %s") % short(n),
520 self._exc(lr, _("checking rename of %s") % short(n),
519 inst, f)
521 inst, f)
520
522
521 # cross-check
523 # cross-check
522 if f in filenodes:
524 if f in filenodes:
523 fns = [(v, k) for k, v in filenodes[f].iteritems()]
525 fns = [(v, k) for k, v in filenodes[f].iteritems()]
524 for lr, node in sorted(fns):
526 for lr, node in sorted(fns):
525 self._err(lr, _("manifest refers to unknown revision %s") %
527 self._err(lr, _("manifest refers to unknown revision %s") %
526 short(node), f)
528 short(node), f)
527 progress.complete()
529 progress.complete()
528
530
529 if self.warnorphanstorefiles:
531 if self.warnorphanstorefiles:
530 for f in sorted(storefiles):
532 for f in sorted(storefiles):
531 self._warn(_("warning: orphan data file '%s'") % f)
533 self._warn(_("warning: orphan data file '%s'") % f)
532
534
533 return len(files), revisions
535 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now