##// END OF EJS Templates
verify: document the `_verifymanifest` method
marmoute -
r42042:5ad5a70d default
parent child Browse files
Show More
@@ -1,506 +1,533 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 nullid,
14 nullid,
15 short,
15 short,
16 )
16 )
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 util,
22 util,
23 )
23 )
24
24
25 def verify(repo):
25 def verify(repo):
26 with repo.lock():
26 with repo.lock():
27 return verifier(repo).verify()
27 return verifier(repo).verify()
28
28
29 def _normpath(f):
29 def _normpath(f):
30 # under hg < 2.4, convert didn't sanitize paths properly, so a
30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 # converted repo may contain repeated slashes
31 # converted repo may contain repeated slashes
32 while '//' in f:
32 while '//' in f:
33 f = f.replace('//', '/')
33 f = f.replace('//', '/')
34 return f
34 return f
35
35
36 class verifier(object):
36 class verifier(object):
37 def __init__(self, repo):
37 def __init__(self, repo):
38 self.repo = repo.unfiltered()
38 self.repo = repo.unfiltered()
39 self.ui = repo.ui
39 self.ui = repo.ui
40 self.match = repo.narrowmatch()
40 self.match = repo.narrowmatch()
41 self.badrevs = set()
41 self.badrevs = set()
42 self.errors = 0
42 self.errors = 0
43 self.warnings = 0
43 self.warnings = 0
44 self.havecl = len(repo.changelog) > 0
44 self.havecl = len(repo.changelog) > 0
45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 self.refersmf = False
48 self.refersmf = False
49 self.fncachewarned = False
49 self.fncachewarned = False
50 # developer config: verify.skipflags
50 # developer config: verify.skipflags
51 self.skipflags = repo.ui.configint('verify', 'skipflags')
51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 self.warnorphanstorefiles = True
52 self.warnorphanstorefiles = True
53
53
54 def _warn(self, msg):
54 def _warn(self, msg):
55 """record a "warning" level issue"""
55 """record a "warning" level issue"""
56 self.ui.warn(msg + "\n")
56 self.ui.warn(msg + "\n")
57 self.warnings += 1
57 self.warnings += 1
58
58
59 def _err(self, linkrev, msg, filename=None):
59 def _err(self, linkrev, msg, filename=None):
60 """record a "error" level issue"""
60 """record a "error" level issue"""
61 if linkrev is not None:
61 if linkrev is not None:
62 self.badrevs.add(linkrev)
62 self.badrevs.add(linkrev)
63 linkrev = "%d" % linkrev
63 linkrev = "%d" % linkrev
64 else:
64 else:
65 linkrev = '?'
65 linkrev = '?'
66 msg = "%s: %s" % (linkrev, msg)
66 msg = "%s: %s" % (linkrev, msg)
67 if filename:
67 if filename:
68 msg = "%s@%s" % (filename, msg)
68 msg = "%s@%s" % (filename, msg)
69 self.ui.warn(" " + msg + "\n")
69 self.ui.warn(" " + msg + "\n")
70 self.errors += 1
70 self.errors += 1
71
71
72 def _exc(self, linkrev, msg, inst, filename=None):
72 def _exc(self, linkrev, msg, inst, filename=None):
73 """record exception raised during the verify process"""
73 """record exception raised during the verify process"""
74 fmsg = pycompat.bytestr(inst)
74 fmsg = pycompat.bytestr(inst)
75 if not fmsg:
75 if not fmsg:
76 fmsg = pycompat.byterepr(inst)
76 fmsg = pycompat.byterepr(inst)
77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
78
78
79 def _checkrevlog(self, obj, name, linkrev):
79 def _checkrevlog(self, obj, name, linkrev):
80 """verify high level property of a revlog
80 """verify high level property of a revlog
81
81
82 - revlog is present,
82 - revlog is present,
83 - revlog is non-empty,
83 - revlog is non-empty,
84 - sizes (index and data) are correct,
84 - sizes (index and data) are correct,
85 - revlog's format version is correct.
85 - revlog's format version is correct.
86 """
86 """
87 if not len(obj) and (self.havecl or self.havemf):
87 if not len(obj) and (self.havecl or self.havemf):
88 self._err(linkrev, _("empty or missing %s") % name)
88 self._err(linkrev, _("empty or missing %s") % name)
89 return
89 return
90
90
91 d = obj.checksize()
91 d = obj.checksize()
92 if d[0]:
92 if d[0]:
93 self.err(None, _("data length off by %d bytes") % d[0], name)
93 self.err(None, _("data length off by %d bytes") % d[0], name)
94 if d[1]:
94 if d[1]:
95 self.err(None, _("index contains %d extra bytes") % d[1], name)
95 self.err(None, _("index contains %d extra bytes") % d[1], name)
96
96
97 if obj.version != revlog.REVLOGV0:
97 if obj.version != revlog.REVLOGV0:
98 if not self.revlogv1:
98 if not self.revlogv1:
99 self._warn(_("warning: `%s' uses revlog format 1") % name)
99 self._warn(_("warning: `%s' uses revlog format 1") % name)
100 elif self.revlogv1:
100 elif self.revlogv1:
101 self._warn(_("warning: `%s' uses revlog format 0") % name)
101 self._warn(_("warning: `%s' uses revlog format 0") % name)
102
102
103 def _checkentry(self, obj, i, node, seen, linkrevs, f):
103 def _checkentry(self, obj, i, node, seen, linkrevs, f):
104 """verify a single revlog entry
104 """verify a single revlog entry
105
105
106 arguments are:
106 arguments are:
107 - obj: the source revlog
107 - obj: the source revlog
108 - i: the revision number
108 - i: the revision number
109 - node: the revision node id
109 - node: the revision node id
110 - seen: nodes previously seen for this revlog
110 - seen: nodes previously seen for this revlog
111 - linkrevs: [changelog-revisions] introducing "node"
111 - linkrevs: [changelog-revisions] introducing "node"
112 - f: string label ("changelog", "manifest", or filename)
112 - f: string label ("changelog", "manifest", or filename)
113
113
114 Performs the following checks:
114 Performs the following checks:
115 - linkrev points to an existing changelog revision,
115 - linkrev points to an existing changelog revision,
116 - linkrev points to a changelog revision that introduces this revision,
116 - linkrev points to a changelog revision that introduces this revision,
117 - linkrev points to the lowest of these changesets,
117 - linkrev points to the lowest of these changesets,
118 - both parents exist in the revlog,
118 - both parents exist in the revlog,
119 - the revision is not duplicated.
119 - the revision is not duplicated.
120
120
121 Return the linkrev of the revision (or None for changelog's revisions).
121 Return the linkrev of the revision (or None for changelog's revisions).
122 """
122 """
123 lr = obj.linkrev(obj.rev(node))
123 lr = obj.linkrev(obj.rev(node))
124 if lr < 0 or (self.havecl and lr not in linkrevs):
124 if lr < 0 or (self.havecl and lr not in linkrevs):
125 if lr < 0 or lr >= len(self.repo.changelog):
125 if lr < 0 or lr >= len(self.repo.changelog):
126 msg = _("rev %d points to nonexistent changeset %d")
126 msg = _("rev %d points to nonexistent changeset %d")
127 else:
127 else:
128 msg = _("rev %d points to unexpected changeset %d")
128 msg = _("rev %d points to unexpected changeset %d")
129 self._err(None, msg % (i, lr), f)
129 self._err(None, msg % (i, lr), f)
130 if linkrevs:
130 if linkrevs:
131 if f and len(linkrevs) > 1:
131 if f and len(linkrevs) > 1:
132 try:
132 try:
133 # attempt to filter down to real linkrevs
133 # attempt to filter down to real linkrevs
134 linkrevs = [l for l in linkrevs
134 linkrevs = [l for l in linkrevs
135 if self.lrugetctx(l)[f].filenode() == node]
135 if self.lrugetctx(l)[f].filenode() == node]
136 except Exception:
136 except Exception:
137 pass
137 pass
138 self._warn(_(" (expected %s)") % " ".join
138 self._warn(_(" (expected %s)") % " ".join
139 (map(pycompat.bytestr, linkrevs)))
139 (map(pycompat.bytestr, linkrevs)))
140 lr = None # can't be trusted
140 lr = None # can't be trusted
141
141
142 try:
142 try:
143 p1, p2 = obj.parents(node)
143 p1, p2 = obj.parents(node)
144 if p1 not in seen and p1 != nullid:
144 if p1 not in seen and p1 != nullid:
145 self._err(lr, _("unknown parent 1 %s of %s") %
145 self._err(lr, _("unknown parent 1 %s of %s") %
146 (short(p1), short(node)), f)
146 (short(p1), short(node)), f)
147 if p2 not in seen and p2 != nullid:
147 if p2 not in seen and p2 != nullid:
148 self._err(lr, _("unknown parent 2 %s of %s") %
148 self._err(lr, _("unknown parent 2 %s of %s") %
149 (short(p2), short(node)), f)
149 (short(p2), short(node)), f)
150 except Exception as inst:
150 except Exception as inst:
151 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
151 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
152
152
153 if node in seen:
153 if node in seen:
154 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
154 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
155 seen[node] = i
155 seen[node] = i
156 return lr
156 return lr
157
157
158 def verify(self):
158 def verify(self):
159 """verify the content of the Mercurial repository
159 """verify the content of the Mercurial repository
160
160
161 This method run all verifications, displaying issues as they are found.
161 This method run all verifications, displaying issues as they are found.
162
162
163 return 1 if any error have been encountered, 0 otherwise."""
163 return 1 if any error have been encountered, 0 otherwise."""
164 # initial validation and generic report
164 # initial validation and generic report
165 repo = self.repo
165 repo = self.repo
166 ui = repo.ui
166 ui = repo.ui
167 if not repo.url().startswith('file:'):
167 if not repo.url().startswith('file:'):
168 raise error.Abort(_("cannot verify bundle or remote repos"))
168 raise error.Abort(_("cannot verify bundle or remote repos"))
169
169
170 if os.path.exists(repo.sjoin("journal")):
170 if os.path.exists(repo.sjoin("journal")):
171 ui.warn(_("abandoned transaction found - run hg recover\n"))
171 ui.warn(_("abandoned transaction found - run hg recover\n"))
172
172
173 if ui.verbose or not self.revlogv1:
173 if ui.verbose or not self.revlogv1:
174 ui.status(_("repository uses revlog format %d\n") %
174 ui.status(_("repository uses revlog format %d\n") %
175 (self.revlogv1 and 1 or 0))
175 (self.revlogv1 and 1 or 0))
176
176
177 # data verification
177 # data verification
178 mflinkrevs, filelinkrevs = self._verifychangelog()
178 mflinkrevs, filelinkrevs = self._verifychangelog()
179 filenodes = self._verifymanifest(mflinkrevs)
179 filenodes = self._verifymanifest(mflinkrevs)
180 del mflinkrevs
180 del mflinkrevs
181 self._crosscheckfiles(filelinkrevs, filenodes)
181 self._crosscheckfiles(filelinkrevs, filenodes)
182 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
182 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
183
183
184 # final report
184 # final report
185 ui.status(_("checked %d changesets with %d changes to %d files\n") %
185 ui.status(_("checked %d changesets with %d changes to %d files\n") %
186 (len(repo.changelog), filerevisions, totalfiles))
186 (len(repo.changelog), filerevisions, totalfiles))
187 if self.warnings:
187 if self.warnings:
188 ui.warn(_("%d warnings encountered!\n") % self.warnings)
188 ui.warn(_("%d warnings encountered!\n") % self.warnings)
189 if self.fncachewarned:
189 if self.fncachewarned:
190 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
190 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
191 'corrupt fncache\n'))
191 'corrupt fncache\n'))
192 if self.errors:
192 if self.errors:
193 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
193 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
194 if self.badrevs:
194 if self.badrevs:
195 ui.warn(_("(first damaged changeset appears to be %d)\n")
195 ui.warn(_("(first damaged changeset appears to be %d)\n")
196 % min(self.badrevs))
196 % min(self.badrevs))
197 return 1
197 return 1
198 return 0
198 return 0
199
199
200 def _verifychangelog(self):
200 def _verifychangelog(self):
201 """verify the changelog of a repository
201 """verify the changelog of a repository
202
202
203 The following checks are performed:
203 The following checks are performed:
204 - all of `_checkrevlog` checks,
204 - all of `_checkrevlog` checks,
205 - all of `_checkentry` checks (for each revisions),
205 - all of `_checkentry` checks (for each revisions),
206 - each revision can be read.
206 - each revision can be read.
207
207
208 The function returns some of the data observed in the changesets as a
208 The function returns some of the data observed in the changesets as a
209 (mflinkrevs, filelinkrevs) tuples:
209 (mflinkrevs, filelinkrevs) tuples:
210 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
210 - mflinkrevs: is a { manifest-node -> [changelog-rev] } mapping
211 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
211 - filelinkrevs: is a { file-path -> [changelog-rev] } mapping
212
212
213 If a matcher was specified, filelinkrevs will only contains matched
213 If a matcher was specified, filelinkrevs will only contains matched
214 files.
214 files.
215 """
215 """
216 ui = self.ui
216 ui = self.ui
217 repo = self.repo
217 repo = self.repo
218 match = self.match
218 match = self.match
219 cl = repo.changelog
219 cl = repo.changelog
220
220
221 ui.status(_("checking changesets\n"))
221 ui.status(_("checking changesets\n"))
222 mflinkrevs = {}
222 mflinkrevs = {}
223 filelinkrevs = {}
223 filelinkrevs = {}
224 seen = {}
224 seen = {}
225 self._checkrevlog(cl, "changelog", 0)
225 self._checkrevlog(cl, "changelog", 0)
226 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
226 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
227 total=len(repo))
227 total=len(repo))
228 for i in repo:
228 for i in repo:
229 progress.update(i)
229 progress.update(i)
230 n = cl.node(i)
230 n = cl.node(i)
231 self._checkentry(cl, i, n, seen, [i], "changelog")
231 self._checkentry(cl, i, n, seen, [i], "changelog")
232
232
233 try:
233 try:
234 changes = cl.read(n)
234 changes = cl.read(n)
235 if changes[0] != nullid:
235 if changes[0] != nullid:
236 mflinkrevs.setdefault(changes[0], []).append(i)
236 mflinkrevs.setdefault(changes[0], []).append(i)
237 self.refersmf = True
237 self.refersmf = True
238 for f in changes[3]:
238 for f in changes[3]:
239 if match(f):
239 if match(f):
240 filelinkrevs.setdefault(_normpath(f), []).append(i)
240 filelinkrevs.setdefault(_normpath(f), []).append(i)
241 except Exception as inst:
241 except Exception as inst:
242 self.refersmf = True
242 self.refersmf = True
243 self._exc(i, _("unpacking changeset %s") % short(n), inst)
243 self._exc(i, _("unpacking changeset %s") % short(n), inst)
244 progress.complete()
244 progress.complete()
245 return mflinkrevs, filelinkrevs
245 return mflinkrevs, filelinkrevs
246
246
247 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
247 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
248 subdirprogress=None):
248 subdirprogress=None):
249 """verify the manifestlog content
250
251 Inputs:
252 - mflinkrevs: a {manifest-node -> [changelog-revisions]} mapping
253 - dir: a subdirectory to check (for tree manifest repo)
254 - storefiles: set of currently "orphan" files.
255 - subdirprogress: a progress object
256
257 This function checks:
258 * all of `_checkrevlog` checks (for all manifest related revlogs)
259 * all of `_checkentry` checks (for all manifest related revisions)
260 * nodes for subdirectory exists in the sub-directory manifest
261 * each manifest entries have a file path
262 * each manifest node refered in mflinkrevs exist in the manifest log
263
264 If tree manifest is in use and a matchers is specified, only the
265 sub-directories matching it will be verified.
266
267 return a two level mapping:
268 {"path" -> { filenode -> changelog-revision}}
269
270 This mapping primarily contains entries for every files in the
271 repository. In addition, when tree-manifest is used, it also contains
272 sub-directory entries.
273
274 If a matcher is provided, only matching paths will be included.
275 """
249 repo = self.repo
276 repo = self.repo
250 ui = self.ui
277 ui = self.ui
251 match = self.match
278 match = self.match
252 mfl = self.repo.manifestlog
279 mfl = self.repo.manifestlog
253 mf = mfl.getstorage(dir)
280 mf = mfl.getstorage(dir)
254
281
255 if not dir:
282 if not dir:
256 self.ui.status(_("checking manifests\n"))
283 self.ui.status(_("checking manifests\n"))
257
284
258 filenodes = {}
285 filenodes = {}
259 subdirnodes = {}
286 subdirnodes = {}
260 seen = {}
287 seen = {}
261 label = "manifest"
288 label = "manifest"
262 if dir:
289 if dir:
263 label = dir
290 label = dir
264 revlogfiles = mf.files()
291 revlogfiles = mf.files()
265 storefiles.difference_update(revlogfiles)
292 storefiles.difference_update(revlogfiles)
266 if subdirprogress: # should be true since we're in a subdirectory
293 if subdirprogress: # should be true since we're in a subdirectory
267 subdirprogress.increment()
294 subdirprogress.increment()
268 if self.refersmf:
295 if self.refersmf:
269 # Do not check manifest if there are only changelog entries with
296 # Do not check manifest if there are only changelog entries with
270 # null manifests.
297 # null manifests.
271 self._checkrevlog(mf, label, 0)
298 self._checkrevlog(mf, label, 0)
272 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
299 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
273 total=len(mf))
300 total=len(mf))
274 for i in mf:
301 for i in mf:
275 if not dir:
302 if not dir:
276 progress.update(i)
303 progress.update(i)
277 n = mf.node(i)
304 n = mf.node(i)
278 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
305 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
279 if n in mflinkrevs:
306 if n in mflinkrevs:
280 del mflinkrevs[n]
307 del mflinkrevs[n]
281 elif dir:
308 elif dir:
282 self._err(lr, _("%s not in parent-directory manifest") %
309 self._err(lr, _("%s not in parent-directory manifest") %
283 short(n), label)
310 short(n), label)
284 else:
311 else:
285 self._err(lr, _("%s not in changesets") % short(n), label)
312 self._err(lr, _("%s not in changesets") % short(n), label)
286
313
287 try:
314 try:
288 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
315 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
289 for f, fn, fl in mfdelta.iterentries():
316 for f, fn, fl in mfdelta.iterentries():
290 if not f:
317 if not f:
291 self._err(lr, _("entry without name in manifest"))
318 self._err(lr, _("entry without name in manifest"))
292 elif f == "/dev/null": # ignore this in very old repos
319 elif f == "/dev/null": # ignore this in very old repos
293 continue
320 continue
294 fullpath = dir + _normpath(f)
321 fullpath = dir + _normpath(f)
295 if fl == 't':
322 if fl == 't':
296 if not match.visitdir(fullpath):
323 if not match.visitdir(fullpath):
297 continue
324 continue
298 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
325 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
299 fn, []).append(lr)
326 fn, []).append(lr)
300 else:
327 else:
301 if not match(fullpath):
328 if not match(fullpath):
302 continue
329 continue
303 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
330 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
304 except Exception as inst:
331 except Exception as inst:
305 self._exc(lr, _("reading delta %s") % short(n), inst, label)
332 self._exc(lr, _("reading delta %s") % short(n), inst, label)
306 if not dir:
333 if not dir:
307 progress.complete()
334 progress.complete()
308
335
309 if self.havemf:
336 if self.havemf:
310 for c, m in sorted([(c, m) for m in mflinkrevs
337 for c, m in sorted([(c, m) for m in mflinkrevs
311 for c in mflinkrevs[m]]):
338 for c in mflinkrevs[m]]):
312 if dir:
339 if dir:
313 self._err(c, _("parent-directory manifest refers to unknown"
340 self._err(c, _("parent-directory manifest refers to unknown"
314 " revision %s") % short(m), label)
341 " revision %s") % short(m), label)
315 else:
342 else:
316 self._err(c, _("changeset refers to unknown revision %s") %
343 self._err(c, _("changeset refers to unknown revision %s") %
317 short(m), label)
344 short(m), label)
318
345
319 if not dir and subdirnodes:
346 if not dir and subdirnodes:
320 self.ui.status(_("checking directory manifests\n"))
347 self.ui.status(_("checking directory manifests\n"))
321 storefiles = set()
348 storefiles = set()
322 subdirs = set()
349 subdirs = set()
323 revlogv1 = self.revlogv1
350 revlogv1 = self.revlogv1
324 for f, f2, size in repo.store.datafiles():
351 for f, f2, size in repo.store.datafiles():
325 if not f:
352 if not f:
326 self._err(None, _("cannot decode filename '%s'") % f2)
353 self._err(None, _("cannot decode filename '%s'") % f2)
327 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
354 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
328 storefiles.add(_normpath(f))
355 storefiles.add(_normpath(f))
329 subdirs.add(os.path.dirname(f))
356 subdirs.add(os.path.dirname(f))
330 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
357 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
331 total=len(subdirs))
358 total=len(subdirs))
332
359
333 for subdir, linkrevs in subdirnodes.iteritems():
360 for subdir, linkrevs in subdirnodes.iteritems():
334 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
361 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
335 subdirprogress)
362 subdirprogress)
336 for f, onefilenodes in subdirfilenodes.iteritems():
363 for f, onefilenodes in subdirfilenodes.iteritems():
337 filenodes.setdefault(f, {}).update(onefilenodes)
364 filenodes.setdefault(f, {}).update(onefilenodes)
338
365
339 if not dir and subdirnodes:
366 if not dir and subdirnodes:
340 subdirprogress.complete()
367 subdirprogress.complete()
341 if self.warnorphanstorefiles:
368 if self.warnorphanstorefiles:
342 for f in sorted(storefiles):
369 for f in sorted(storefiles):
343 self._warn(_("warning: orphan data file '%s'") % f)
370 self._warn(_("warning: orphan data file '%s'") % f)
344
371
345 return filenodes
372 return filenodes
346
373
347 def _crosscheckfiles(self, filelinkrevs, filenodes):
374 def _crosscheckfiles(self, filelinkrevs, filenodes):
348 repo = self.repo
375 repo = self.repo
349 ui = self.ui
376 ui = self.ui
350 ui.status(_("crosschecking files in changesets and manifests\n"))
377 ui.status(_("crosschecking files in changesets and manifests\n"))
351
378
352 total = len(filelinkrevs) + len(filenodes)
379 total = len(filelinkrevs) + len(filenodes)
353 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
380 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
354 total=total)
381 total=total)
355 if self.havemf:
382 if self.havemf:
356 for f in sorted(filelinkrevs):
383 for f in sorted(filelinkrevs):
357 progress.increment()
384 progress.increment()
358 if f not in filenodes:
385 if f not in filenodes:
359 lr = filelinkrevs[f][0]
386 lr = filelinkrevs[f][0]
360 self._err(lr, _("in changeset but not in manifest"), f)
387 self._err(lr, _("in changeset but not in manifest"), f)
361
388
362 if self.havecl:
389 if self.havecl:
363 for f in sorted(filenodes):
390 for f in sorted(filenodes):
364 progress.increment()
391 progress.increment()
365 if f not in filelinkrevs:
392 if f not in filelinkrevs:
366 try:
393 try:
367 fl = repo.file(f)
394 fl = repo.file(f)
368 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
395 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
369 except Exception:
396 except Exception:
370 lr = None
397 lr = None
371 self._err(lr, _("in manifest but not in changeset"), f)
398 self._err(lr, _("in manifest but not in changeset"), f)
372
399
373 progress.complete()
400 progress.complete()
374
401
375 def _verifyfiles(self, filenodes, filelinkrevs):
402 def _verifyfiles(self, filenodes, filelinkrevs):
376 repo = self.repo
403 repo = self.repo
377 ui = self.ui
404 ui = self.ui
378 lrugetctx = self.lrugetctx
405 lrugetctx = self.lrugetctx
379 revlogv1 = self.revlogv1
406 revlogv1 = self.revlogv1
380 havemf = self.havemf
407 havemf = self.havemf
381 ui.status(_("checking files\n"))
408 ui.status(_("checking files\n"))
382
409
383 storefiles = set()
410 storefiles = set()
384 for f, f2, size in repo.store.datafiles():
411 for f, f2, size in repo.store.datafiles():
385 if not f:
412 if not f:
386 self._err(None, _("cannot decode filename '%s'") % f2)
413 self._err(None, _("cannot decode filename '%s'") % f2)
387 elif (size > 0 or not revlogv1) and f.startswith('data/'):
414 elif (size > 0 or not revlogv1) and f.startswith('data/'):
388 storefiles.add(_normpath(f))
415 storefiles.add(_normpath(f))
389
416
390 state = {
417 state = {
391 # TODO this assumes revlog storage for changelog.
418 # TODO this assumes revlog storage for changelog.
392 'expectedversion': self.repo.changelog.version & 0xFFFF,
419 'expectedversion': self.repo.changelog.version & 0xFFFF,
393 'skipflags': self.skipflags,
420 'skipflags': self.skipflags,
394 # experimental config: censor.policy
421 # experimental config: censor.policy
395 'erroroncensored': ui.config('censor', 'policy') == 'abort',
422 'erroroncensored': ui.config('censor', 'policy') == 'abort',
396 }
423 }
397
424
398 files = sorted(set(filenodes) | set(filelinkrevs))
425 files = sorted(set(filenodes) | set(filelinkrevs))
399 revisions = 0
426 revisions = 0
400 progress = ui.makeprogress(_('checking'), unit=_('files'),
427 progress = ui.makeprogress(_('checking'), unit=_('files'),
401 total=len(files))
428 total=len(files))
402 for i, f in enumerate(files):
429 for i, f in enumerate(files):
403 progress.update(i, item=f)
430 progress.update(i, item=f)
404 try:
431 try:
405 linkrevs = filelinkrevs[f]
432 linkrevs = filelinkrevs[f]
406 except KeyError:
433 except KeyError:
407 # in manifest but not in changelog
434 # in manifest but not in changelog
408 linkrevs = []
435 linkrevs = []
409
436
410 if linkrevs:
437 if linkrevs:
411 lr = linkrevs[0]
438 lr = linkrevs[0]
412 else:
439 else:
413 lr = None
440 lr = None
414
441
415 try:
442 try:
416 fl = repo.file(f)
443 fl = repo.file(f)
417 except error.StorageError as e:
444 except error.StorageError as e:
418 self._err(lr, _("broken revlog! (%s)") % e, f)
445 self._err(lr, _("broken revlog! (%s)") % e, f)
419 continue
446 continue
420
447
421 for ff in fl.files():
448 for ff in fl.files():
422 try:
449 try:
423 storefiles.remove(ff)
450 storefiles.remove(ff)
424 except KeyError:
451 except KeyError:
425 if self.warnorphanstorefiles:
452 if self.warnorphanstorefiles:
426 self._warn(_(" warning: revlog '%s' not in fncache!") %
453 self._warn(_(" warning: revlog '%s' not in fncache!") %
427 ff)
454 ff)
428 self.fncachewarned = True
455 self.fncachewarned = True
429
456
430 if not len(fl) and (self.havecl or self.havemf):
457 if not len(fl) and (self.havecl or self.havemf):
431 self._err(lr, _("empty or missing %s") % f)
458 self._err(lr, _("empty or missing %s") % f)
432 else:
459 else:
433 # Guard against implementations not setting this.
460 # Guard against implementations not setting this.
434 state['skipread'] = set()
461 state['skipread'] = set()
435 for problem in fl.verifyintegrity(state):
462 for problem in fl.verifyintegrity(state):
436 if problem.node is not None:
463 if problem.node is not None:
437 linkrev = fl.linkrev(fl.rev(problem.node))
464 linkrev = fl.linkrev(fl.rev(problem.node))
438 else:
465 else:
439 linkrev = None
466 linkrev = None
440
467
441 if problem.warning:
468 if problem.warning:
442 self._warn(problem.warning)
469 self._warn(problem.warning)
443 elif problem.error:
470 elif problem.error:
444 self._err(linkrev if linkrev is not None else lr,
471 self._err(linkrev if linkrev is not None else lr,
445 problem.error, f)
472 problem.error, f)
446 else:
473 else:
447 raise error.ProgrammingError(
474 raise error.ProgrammingError(
448 'problem instance does not set warning or error '
475 'problem instance does not set warning or error '
449 'attribute: %s' % problem.msg)
476 'attribute: %s' % problem.msg)
450
477
451 seen = {}
478 seen = {}
452 for i in fl:
479 for i in fl:
453 revisions += 1
480 revisions += 1
454 n = fl.node(i)
481 n = fl.node(i)
455 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
482 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
456 if f in filenodes:
483 if f in filenodes:
457 if havemf and n not in filenodes[f]:
484 if havemf and n not in filenodes[f]:
458 self._err(lr, _("%s not in manifests") % (short(n)), f)
485 self._err(lr, _("%s not in manifests") % (short(n)), f)
459 else:
486 else:
460 del filenodes[f][n]
487 del filenodes[f][n]
461
488
462 if n in state['skipread']:
489 if n in state['skipread']:
463 continue
490 continue
464
491
465 # check renames
492 # check renames
466 try:
493 try:
467 # This requires resolving fulltext (at least on revlogs). We
494 # This requires resolving fulltext (at least on revlogs). We
468 # may want ``verifyintegrity()`` to pass a set of nodes with
495 # may want ``verifyintegrity()`` to pass a set of nodes with
469 # rename metadata as an optimization.
496 # rename metadata as an optimization.
470 rp = fl.renamed(n)
497 rp = fl.renamed(n)
471 if rp:
498 if rp:
472 if lr is not None and ui.verbose:
499 if lr is not None and ui.verbose:
473 ctx = lrugetctx(lr)
500 ctx = lrugetctx(lr)
474 if not any(rp[0] in pctx for pctx in ctx.parents()):
501 if not any(rp[0] in pctx for pctx in ctx.parents()):
475 self._warn(_("warning: copy source of '%s' not"
502 self._warn(_("warning: copy source of '%s' not"
476 " in parents of %s") % (f, ctx))
503 " in parents of %s") % (f, ctx))
477 fl2 = repo.file(rp[0])
504 fl2 = repo.file(rp[0])
478 if not len(fl2):
505 if not len(fl2):
479 self._err(lr,
506 self._err(lr,
480 _("empty or missing copy source revlog "
507 _("empty or missing copy source revlog "
481 "%s:%s") % (rp[0],
508 "%s:%s") % (rp[0],
482 short(rp[1])),
509 short(rp[1])),
483 f)
510 f)
484 elif rp[1] == nullid:
511 elif rp[1] == nullid:
485 ui.note(_("warning: %s@%s: copy source"
512 ui.note(_("warning: %s@%s: copy source"
486 " revision is nullid %s:%s\n")
513 " revision is nullid %s:%s\n")
487 % (f, lr, rp[0], short(rp[1])))
514 % (f, lr, rp[0], short(rp[1])))
488 else:
515 else:
489 fl2.rev(rp[1])
516 fl2.rev(rp[1])
490 except Exception as inst:
517 except Exception as inst:
491 self._exc(lr, _("checking rename of %s") % short(n),
518 self._exc(lr, _("checking rename of %s") % short(n),
492 inst, f)
519 inst, f)
493
520
494 # cross-check
521 # cross-check
495 if f in filenodes:
522 if f in filenodes:
496 fns = [(v, k) for k, v in filenodes[f].iteritems()]
523 fns = [(v, k) for k, v in filenodes[f].iteritems()]
497 for lr, node in sorted(fns):
524 for lr, node in sorted(fns):
498 self._err(lr, _("manifest refers to unknown revision %s") %
525 self._err(lr, _("manifest refers to unknown revision %s") %
499 short(node), f)
526 short(node), f)
500 progress.complete()
527 progress.complete()
501
528
502 if self.warnorphanstorefiles:
529 if self.warnorphanstorefiles:
503 for f in sorted(storefiles):
530 for f in sorted(storefiles):
504 self._warn(_("warning: orphan data file '%s'") % f)
531 self._warn(_("warning: orphan data file '%s'") % f)
505
532
506 return len(files), revisions
533 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now