##// END OF EJS Templates
verify: use progress helper for subdirectory progress...
Martin von Zweigbergk -
r38415:0ddbe03c default
parent child Browse files
Show More
@@ -1,492 +1,488 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from __future__ import absolute_import
8 from __future__ import absolute_import
9
9
10 import os
10 import os
11
11
12 from .i18n import _
12 from .i18n import _
13 from .node import (
13 from .node import (
14 nullid,
14 nullid,
15 short,
15 short,
16 )
16 )
17
17
18 from . import (
18 from . import (
19 error,
19 error,
20 pycompat,
20 pycompat,
21 revlog,
21 revlog,
22 scmutil,
22 scmutil,
23 util,
23 util,
24 )
24 )
25
25
26 def verify(repo):
26 def verify(repo):
27 with repo.lock():
27 with repo.lock():
28 return verifier(repo).verify()
28 return verifier(repo).verify()
29
29
30 def _normpath(f):
30 def _normpath(f):
31 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 # under hg < 2.4, convert didn't sanitize paths properly, so a
32 # converted repo may contain repeated slashes
32 # converted repo may contain repeated slashes
33 while '//' in f:
33 while '//' in f:
34 f = f.replace('//', '/')
34 f = f.replace('//', '/')
35 return f
35 return f
36
36
37 class verifier(object):
37 class verifier(object):
38 # The match argument is always None in hg core, but e.g. the narrowhg
38 # The match argument is always None in hg core, but e.g. the narrowhg
39 # extension will pass in a matcher here.
39 # extension will pass in a matcher here.
40 def __init__(self, repo, match=None):
40 def __init__(self, repo, match=None):
41 self.repo = repo.unfiltered()
41 self.repo = repo.unfiltered()
42 self.ui = repo.ui
42 self.ui = repo.ui
43 self.match = match or scmutil.matchall(repo)
43 self.match = match or scmutil.matchall(repo)
44 self.badrevs = set()
44 self.badrevs = set()
45 self.errors = 0
45 self.errors = 0
46 self.warnings = 0
46 self.warnings = 0
47 self.havecl = len(repo.changelog) > 0
47 self.havecl = len(repo.changelog) > 0
48 self.havemf = len(repo.manifestlog._revlog) > 0
48 self.havemf = len(repo.manifestlog._revlog) > 0
49 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
49 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
50 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
50 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
51 self.refersmf = False
51 self.refersmf = False
52 self.fncachewarned = False
52 self.fncachewarned = False
53 # developer config: verify.skipflags
53 # developer config: verify.skipflags
54 self.skipflags = repo.ui.configint('verify', 'skipflags')
54 self.skipflags = repo.ui.configint('verify', 'skipflags')
55 self.warnorphanstorefiles = True
55 self.warnorphanstorefiles = True
56
56
57 def warn(self, msg):
57 def warn(self, msg):
58 self.ui.warn(msg + "\n")
58 self.ui.warn(msg + "\n")
59 self.warnings += 1
59 self.warnings += 1
60
60
61 def err(self, linkrev, msg, filename=None):
61 def err(self, linkrev, msg, filename=None):
62 if linkrev is not None:
62 if linkrev is not None:
63 self.badrevs.add(linkrev)
63 self.badrevs.add(linkrev)
64 linkrev = "%d" % linkrev
64 linkrev = "%d" % linkrev
65 else:
65 else:
66 linkrev = '?'
66 linkrev = '?'
67 msg = "%s: %s" % (linkrev, msg)
67 msg = "%s: %s" % (linkrev, msg)
68 if filename:
68 if filename:
69 msg = "%s@%s" % (filename, msg)
69 msg = "%s@%s" % (filename, msg)
70 self.ui.warn(" " + msg + "\n")
70 self.ui.warn(" " + msg + "\n")
71 self.errors += 1
71 self.errors += 1
72
72
73 def exc(self, linkrev, msg, inst, filename=None):
73 def exc(self, linkrev, msg, inst, filename=None):
74 fmsg = pycompat.bytestr(inst)
74 fmsg = pycompat.bytestr(inst)
75 if not fmsg:
75 if not fmsg:
76 fmsg = pycompat.byterepr(inst)
76 fmsg = pycompat.byterepr(inst)
77 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
77 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
78
78
79 def checklog(self, obj, name, linkrev):
79 def checklog(self, obj, name, linkrev):
80 if not len(obj) and (self.havecl or self.havemf):
80 if not len(obj) and (self.havecl or self.havemf):
81 self.err(linkrev, _("empty or missing %s") % name)
81 self.err(linkrev, _("empty or missing %s") % name)
82 return
82 return
83
83
84 d = obj.checksize()
84 d = obj.checksize()
85 if d[0]:
85 if d[0]:
86 self.err(None, _("data length off by %d bytes") % d[0], name)
86 self.err(None, _("data length off by %d bytes") % d[0], name)
87 if d[1]:
87 if d[1]:
88 self.err(None, _("index contains %d extra bytes") % d[1], name)
88 self.err(None, _("index contains %d extra bytes") % d[1], name)
89
89
90 if obj.version != revlog.REVLOGV0:
90 if obj.version != revlog.REVLOGV0:
91 if not self.revlogv1:
91 if not self.revlogv1:
92 self.warn(_("warning: `%s' uses revlog format 1") % name)
92 self.warn(_("warning: `%s' uses revlog format 1") % name)
93 elif self.revlogv1:
93 elif self.revlogv1:
94 self.warn(_("warning: `%s' uses revlog format 0") % name)
94 self.warn(_("warning: `%s' uses revlog format 0") % name)
95
95
96 def checkentry(self, obj, i, node, seen, linkrevs, f):
96 def checkentry(self, obj, i, node, seen, linkrevs, f):
97 lr = obj.linkrev(obj.rev(node))
97 lr = obj.linkrev(obj.rev(node))
98 if lr < 0 or (self.havecl and lr not in linkrevs):
98 if lr < 0 or (self.havecl and lr not in linkrevs):
99 if lr < 0 or lr >= len(self.repo.changelog):
99 if lr < 0 or lr >= len(self.repo.changelog):
100 msg = _("rev %d points to nonexistent changeset %d")
100 msg = _("rev %d points to nonexistent changeset %d")
101 else:
101 else:
102 msg = _("rev %d points to unexpected changeset %d")
102 msg = _("rev %d points to unexpected changeset %d")
103 self.err(None, msg % (i, lr), f)
103 self.err(None, msg % (i, lr), f)
104 if linkrevs:
104 if linkrevs:
105 if f and len(linkrevs) > 1:
105 if f and len(linkrevs) > 1:
106 try:
106 try:
107 # attempt to filter down to real linkrevs
107 # attempt to filter down to real linkrevs
108 linkrevs = [l for l in linkrevs
108 linkrevs = [l for l in linkrevs
109 if self.lrugetctx(l)[f].filenode() == node]
109 if self.lrugetctx(l)[f].filenode() == node]
110 except Exception:
110 except Exception:
111 pass
111 pass
112 self.warn(_(" (expected %s)") % " ".join
112 self.warn(_(" (expected %s)") % " ".join
113 (map(pycompat.bytestr, linkrevs)))
113 (map(pycompat.bytestr, linkrevs)))
114 lr = None # can't be trusted
114 lr = None # can't be trusted
115
115
116 try:
116 try:
117 p1, p2 = obj.parents(node)
117 p1, p2 = obj.parents(node)
118 if p1 not in seen and p1 != nullid:
118 if p1 not in seen and p1 != nullid:
119 self.err(lr, _("unknown parent 1 %s of %s") %
119 self.err(lr, _("unknown parent 1 %s of %s") %
120 (short(p1), short(node)), f)
120 (short(p1), short(node)), f)
121 if p2 not in seen and p2 != nullid:
121 if p2 not in seen and p2 != nullid:
122 self.err(lr, _("unknown parent 2 %s of %s") %
122 self.err(lr, _("unknown parent 2 %s of %s") %
123 (short(p2), short(node)), f)
123 (short(p2), short(node)), f)
124 except Exception as inst:
124 except Exception as inst:
125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126
126
127 if node in seen:
127 if node in seen:
128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 seen[node] = i
129 seen[node] = i
130 return lr
130 return lr
131
131
132 def verify(self):
132 def verify(self):
133 repo = self.repo
133 repo = self.repo
134
134
135 ui = repo.ui
135 ui = repo.ui
136
136
137 if not repo.url().startswith('file:'):
137 if not repo.url().startswith('file:'):
138 raise error.Abort(_("cannot verify bundle or remote repos"))
138 raise error.Abort(_("cannot verify bundle or remote repos"))
139
139
140 if os.path.exists(repo.sjoin("journal")):
140 if os.path.exists(repo.sjoin("journal")):
141 ui.warn(_("abandoned transaction found - run hg recover\n"))
141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142
142
143 if ui.verbose or not self.revlogv1:
143 if ui.verbose or not self.revlogv1:
144 ui.status(_("repository uses revlog format %d\n") %
144 ui.status(_("repository uses revlog format %d\n") %
145 (self.revlogv1 and 1 or 0))
145 (self.revlogv1 and 1 or 0))
146
146
147 mflinkrevs, filelinkrevs = self._verifychangelog()
147 mflinkrevs, filelinkrevs = self._verifychangelog()
148
148
149 filenodes = self._verifymanifest(mflinkrevs)
149 filenodes = self._verifymanifest(mflinkrevs)
150 del mflinkrevs
150 del mflinkrevs
151
151
152 self._crosscheckfiles(filelinkrevs, filenodes)
152 self._crosscheckfiles(filelinkrevs, filenodes)
153
153
154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155
155
156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
157 (totalfiles, len(repo.changelog), filerevisions))
157 (totalfiles, len(repo.changelog), filerevisions))
158 if self.warnings:
158 if self.warnings:
159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 if self.fncachewarned:
160 if self.fncachewarned:
161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 'corrupt fncache\n'))
162 'corrupt fncache\n'))
163 if self.errors:
163 if self.errors:
164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 if self.badrevs:
165 if self.badrevs:
166 ui.warn(_("(first damaged changeset appears to be %d)\n")
166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 % min(self.badrevs))
167 % min(self.badrevs))
168 return 1
168 return 1
169
169
170 def _verifychangelog(self):
170 def _verifychangelog(self):
171 ui = self.ui
171 ui = self.ui
172 repo = self.repo
172 repo = self.repo
173 match = self.match
173 match = self.match
174 cl = repo.changelog
174 cl = repo.changelog
175
175
176 ui.status(_("checking changesets\n"))
176 ui.status(_("checking changesets\n"))
177 mflinkrevs = {}
177 mflinkrevs = {}
178 filelinkrevs = {}
178 filelinkrevs = {}
179 seen = {}
179 seen = {}
180 self.checklog(cl, "changelog", 0)
180 self.checklog(cl, "changelog", 0)
181 total = len(repo)
181 total = len(repo)
182 for i in repo:
182 for i in repo:
183 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
183 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
184 n = cl.node(i)
184 n = cl.node(i)
185 self.checkentry(cl, i, n, seen, [i], "changelog")
185 self.checkentry(cl, i, n, seen, [i], "changelog")
186
186
187 try:
187 try:
188 changes = cl.read(n)
188 changes = cl.read(n)
189 if changes[0] != nullid:
189 if changes[0] != nullid:
190 mflinkrevs.setdefault(changes[0], []).append(i)
190 mflinkrevs.setdefault(changes[0], []).append(i)
191 self.refersmf = True
191 self.refersmf = True
192 for f in changes[3]:
192 for f in changes[3]:
193 if match(f):
193 if match(f):
194 filelinkrevs.setdefault(_normpath(f), []).append(i)
194 filelinkrevs.setdefault(_normpath(f), []).append(i)
195 except Exception as inst:
195 except Exception as inst:
196 self.refersmf = True
196 self.refersmf = True
197 self.exc(i, _("unpacking changeset %s") % short(n), inst)
197 self.exc(i, _("unpacking changeset %s") % short(n), inst)
198 ui.progress(_('checking'), None)
198 ui.progress(_('checking'), None)
199 return mflinkrevs, filelinkrevs
199 return mflinkrevs, filelinkrevs
200
200
201 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
201 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
202 progress=None):
202 subdirprogress=None):
203 repo = self.repo
203 repo = self.repo
204 ui = self.ui
204 ui = self.ui
205 match = self.match
205 match = self.match
206 mfl = self.repo.manifestlog
206 mfl = self.repo.manifestlog
207 mf = mfl._revlog.dirlog(dir)
207 mf = mfl._revlog.dirlog(dir)
208
208
209 if not dir:
209 if not dir:
210 self.ui.status(_("checking manifests\n"))
210 self.ui.status(_("checking manifests\n"))
211
211
212 filenodes = {}
212 filenodes = {}
213 subdirnodes = {}
213 subdirnodes = {}
214 seen = {}
214 seen = {}
215 label = "manifest"
215 label = "manifest"
216 if dir:
216 if dir:
217 label = dir
217 label = dir
218 revlogfiles = mf.files()
218 revlogfiles = mf.files()
219 storefiles.difference_update(revlogfiles)
219 storefiles.difference_update(revlogfiles)
220 if progress: # should be true since we're in a subdirectory
220 if subdirprogress: # should be true since we're in a subdirectory
221 progress()
221 subdirprogress.increment()
222 if self.refersmf:
222 if self.refersmf:
223 # Do not check manifest if there are only changelog entries with
223 # Do not check manifest if there are only changelog entries with
224 # null manifests.
224 # null manifests.
225 self.checklog(mf, label, 0)
225 self.checklog(mf, label, 0)
226 total = len(mf)
226 total = len(mf)
227 for i in mf:
227 for i in mf:
228 if not dir:
228 if not dir:
229 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
229 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
230 n = mf.node(i)
230 n = mf.node(i)
231 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
231 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
232 if n in mflinkrevs:
232 if n in mflinkrevs:
233 del mflinkrevs[n]
233 del mflinkrevs[n]
234 elif dir:
234 elif dir:
235 self.err(lr, _("%s not in parent-directory manifest") %
235 self.err(lr, _("%s not in parent-directory manifest") %
236 short(n), label)
236 short(n), label)
237 else:
237 else:
238 self.err(lr, _("%s not in changesets") % short(n), label)
238 self.err(lr, _("%s not in changesets") % short(n), label)
239
239
240 try:
240 try:
241 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
241 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
242 for f, fn, fl in mfdelta.iterentries():
242 for f, fn, fl in mfdelta.iterentries():
243 if not f:
243 if not f:
244 self.err(lr, _("entry without name in manifest"))
244 self.err(lr, _("entry without name in manifest"))
245 elif f == "/dev/null": # ignore this in very old repos
245 elif f == "/dev/null": # ignore this in very old repos
246 continue
246 continue
247 fullpath = dir + _normpath(f)
247 fullpath = dir + _normpath(f)
248 if fl == 't':
248 if fl == 't':
249 if not match.visitdir(fullpath):
249 if not match.visitdir(fullpath):
250 continue
250 continue
251 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
251 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
252 fn, []).append(lr)
252 fn, []).append(lr)
253 else:
253 else:
254 if not match(fullpath):
254 if not match(fullpath):
255 continue
255 continue
256 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
256 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
257 except Exception as inst:
257 except Exception as inst:
258 self.exc(lr, _("reading delta %s") % short(n), inst, label)
258 self.exc(lr, _("reading delta %s") % short(n), inst, label)
259 if not dir:
259 if not dir:
260 ui.progress(_('checking'), None)
260 ui.progress(_('checking'), None)
261
261
262 if self.havemf:
262 if self.havemf:
263 for c, m in sorted([(c, m) for m in mflinkrevs
263 for c, m in sorted([(c, m) for m in mflinkrevs
264 for c in mflinkrevs[m]]):
264 for c in mflinkrevs[m]]):
265 if dir:
265 if dir:
266 self.err(c, _("parent-directory manifest refers to unknown "
266 self.err(c, _("parent-directory manifest refers to unknown "
267 "revision %s") % short(m), label)
267 "revision %s") % short(m), label)
268 else:
268 else:
269 self.err(c, _("changeset refers to unknown revision %s") %
269 self.err(c, _("changeset refers to unknown revision %s") %
270 short(m), label)
270 short(m), label)
271
271
272 if not dir and subdirnodes:
272 if not dir and subdirnodes:
273 self.ui.status(_("checking directory manifests\n"))
273 self.ui.status(_("checking directory manifests\n"))
274 storefiles = set()
274 storefiles = set()
275 subdirs = set()
275 subdirs = set()
276 revlogv1 = self.revlogv1
276 revlogv1 = self.revlogv1
277 for f, f2, size in repo.store.datafiles():
277 for f, f2, size in repo.store.datafiles():
278 if not f:
278 if not f:
279 self.err(None, _("cannot decode filename '%s'") % f2)
279 self.err(None, _("cannot decode filename '%s'") % f2)
280 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
280 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
281 storefiles.add(_normpath(f))
281 storefiles.add(_normpath(f))
282 subdirs.add(os.path.dirname(f))
282 subdirs.add(os.path.dirname(f))
283 subdircount = len(subdirs)
283 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
284 currentsubdir = [0]
284 total=len(subdirs))
285 def progress():
286 currentsubdir[0] += 1
287 ui.progress(_('checking'), currentsubdir[0], total=subdircount,
288 unit=_('manifests'))
289
285
290 for subdir, linkrevs in subdirnodes.iteritems():
286 for subdir, linkrevs in subdirnodes.iteritems():
291 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
287 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
292 progress)
288 subdirprogress)
293 for f, onefilenodes in subdirfilenodes.iteritems():
289 for f, onefilenodes in subdirfilenodes.iteritems():
294 filenodes.setdefault(f, {}).update(onefilenodes)
290 filenodes.setdefault(f, {}).update(onefilenodes)
295
291
296 if not dir and subdirnodes:
292 if not dir and subdirnodes:
297 ui.progress(_('checking'), None)
293 subdirprogress.complete()
298 if self.warnorphanstorefiles:
294 if self.warnorphanstorefiles:
299 for f in sorted(storefiles):
295 for f in sorted(storefiles):
300 self.warn(_("warning: orphan data file '%s'") % f)
296 self.warn(_("warning: orphan data file '%s'") % f)
301
297
302 return filenodes
298 return filenodes
303
299
304 def _crosscheckfiles(self, filelinkrevs, filenodes):
300 def _crosscheckfiles(self, filelinkrevs, filenodes):
305 repo = self.repo
301 repo = self.repo
306 ui = self.ui
302 ui = self.ui
307 ui.status(_("crosschecking files in changesets and manifests\n"))
303 ui.status(_("crosschecking files in changesets and manifests\n"))
308
304
309 total = len(filelinkrevs) + len(filenodes)
305 total = len(filelinkrevs) + len(filenodes)
310 count = 0
306 count = 0
311 if self.havemf:
307 if self.havemf:
312 for f in sorted(filelinkrevs):
308 for f in sorted(filelinkrevs):
313 count += 1
309 count += 1
314 ui.progress(_('crosschecking'), count, total=total)
310 ui.progress(_('crosschecking'), count, total=total)
315 if f not in filenodes:
311 if f not in filenodes:
316 lr = filelinkrevs[f][0]
312 lr = filelinkrevs[f][0]
317 self.err(lr, _("in changeset but not in manifest"), f)
313 self.err(lr, _("in changeset but not in manifest"), f)
318
314
319 if self.havecl:
315 if self.havecl:
320 for f in sorted(filenodes):
316 for f in sorted(filenodes):
321 count += 1
317 count += 1
322 ui.progress(_('crosschecking'), count, total=total)
318 ui.progress(_('crosschecking'), count, total=total)
323 if f not in filelinkrevs:
319 if f not in filelinkrevs:
324 try:
320 try:
325 fl = repo.file(f)
321 fl = repo.file(f)
326 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
322 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
327 except Exception:
323 except Exception:
328 lr = None
324 lr = None
329 self.err(lr, _("in manifest but not in changeset"), f)
325 self.err(lr, _("in manifest but not in changeset"), f)
330
326
331 ui.progress(_('crosschecking'), None)
327 ui.progress(_('crosschecking'), None)
332
328
333 def _verifyfiles(self, filenodes, filelinkrevs):
329 def _verifyfiles(self, filenodes, filelinkrevs):
334 repo = self.repo
330 repo = self.repo
335 ui = self.ui
331 ui = self.ui
336 lrugetctx = self.lrugetctx
332 lrugetctx = self.lrugetctx
337 revlogv1 = self.revlogv1
333 revlogv1 = self.revlogv1
338 havemf = self.havemf
334 havemf = self.havemf
339 ui.status(_("checking files\n"))
335 ui.status(_("checking files\n"))
340
336
341 storefiles = set()
337 storefiles = set()
342 for f, f2, size in repo.store.datafiles():
338 for f, f2, size in repo.store.datafiles():
343 if not f:
339 if not f:
344 self.err(None, _("cannot decode filename '%s'") % f2)
340 self.err(None, _("cannot decode filename '%s'") % f2)
345 elif (size > 0 or not revlogv1) and f.startswith('data/'):
341 elif (size > 0 or not revlogv1) and f.startswith('data/'):
346 storefiles.add(_normpath(f))
342 storefiles.add(_normpath(f))
347
343
348 files = sorted(set(filenodes) | set(filelinkrevs))
344 files = sorted(set(filenodes) | set(filelinkrevs))
349 total = len(files)
345 total = len(files)
350 revisions = 0
346 revisions = 0
351 for i, f in enumerate(files):
347 for i, f in enumerate(files):
352 ui.progress(_('checking'), i, item=f, total=total, unit=_('files'))
348 ui.progress(_('checking'), i, item=f, total=total, unit=_('files'))
353 try:
349 try:
354 linkrevs = filelinkrevs[f]
350 linkrevs = filelinkrevs[f]
355 except KeyError:
351 except KeyError:
356 # in manifest but not in changelog
352 # in manifest but not in changelog
357 linkrevs = []
353 linkrevs = []
358
354
359 if linkrevs:
355 if linkrevs:
360 lr = linkrevs[0]
356 lr = linkrevs[0]
361 else:
357 else:
362 lr = None
358 lr = None
363
359
364 try:
360 try:
365 fl = repo.file(f)
361 fl = repo.file(f)
366 except error.RevlogError as e:
362 except error.RevlogError as e:
367 self.err(lr, _("broken revlog! (%s)") % e, f)
363 self.err(lr, _("broken revlog! (%s)") % e, f)
368 continue
364 continue
369
365
370 for ff in fl.files():
366 for ff in fl.files():
371 try:
367 try:
372 storefiles.remove(ff)
368 storefiles.remove(ff)
373 except KeyError:
369 except KeyError:
374 if self.warnorphanstorefiles:
370 if self.warnorphanstorefiles:
375 self.warn(_(" warning: revlog '%s' not in fncache!") %
371 self.warn(_(" warning: revlog '%s' not in fncache!") %
376 ff)
372 ff)
377 self.fncachewarned = True
373 self.fncachewarned = True
378
374
379 self.checklog(fl, f, lr)
375 self.checklog(fl, f, lr)
380 seen = {}
376 seen = {}
381 rp = None
377 rp = None
382 for i in fl:
378 for i in fl:
383 revisions += 1
379 revisions += 1
384 n = fl.node(i)
380 n = fl.node(i)
385 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
381 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
386 if f in filenodes:
382 if f in filenodes:
387 if havemf and n not in filenodes[f]:
383 if havemf and n not in filenodes[f]:
388 self.err(lr, _("%s not in manifests") % (short(n)), f)
384 self.err(lr, _("%s not in manifests") % (short(n)), f)
389 else:
385 else:
390 del filenodes[f][n]
386 del filenodes[f][n]
391
387
392 # Verify contents. 4 cases to care about:
388 # Verify contents. 4 cases to care about:
393 #
389 #
394 # common: the most common case
390 # common: the most common case
395 # rename: with a rename
391 # rename: with a rename
396 # meta: file content starts with b'\1\n', the metadata
392 # meta: file content starts with b'\1\n', the metadata
397 # header defined in filelog.py, but without a rename
393 # header defined in filelog.py, but without a rename
398 # ext: content stored externally
394 # ext: content stored externally
399 #
395 #
400 # More formally, their differences are shown below:
396 # More formally, their differences are shown below:
401 #
397 #
402 # | common | rename | meta | ext
398 # | common | rename | meta | ext
403 # -------------------------------------------------------
399 # -------------------------------------------------------
404 # flags() | 0 | 0 | 0 | not 0
400 # flags() | 0 | 0 | 0 | not 0
405 # renamed() | False | True | False | ?
401 # renamed() | False | True | False | ?
406 # rawtext[0:2]=='\1\n'| False | True | True | ?
402 # rawtext[0:2]=='\1\n'| False | True | True | ?
407 #
403 #
408 # "rawtext" means the raw text stored in revlog data, which
404 # "rawtext" means the raw text stored in revlog data, which
409 # could be retrieved by "revision(rev, raw=True)". "text"
405 # could be retrieved by "revision(rev, raw=True)". "text"
410 # mentioned below is "revision(rev, raw=False)".
406 # mentioned below is "revision(rev, raw=False)".
411 #
407 #
412 # There are 3 different lengths stored physically:
408 # There are 3 different lengths stored physically:
413 # 1. L1: rawsize, stored in revlog index
409 # 1. L1: rawsize, stored in revlog index
414 # 2. L2: len(rawtext), stored in revlog data
410 # 2. L2: len(rawtext), stored in revlog data
415 # 3. L3: len(text), stored in revlog data if flags==0, or
411 # 3. L3: len(text), stored in revlog data if flags==0, or
416 # possibly somewhere else if flags!=0
412 # possibly somewhere else if flags!=0
417 #
413 #
418 # L1 should be equal to L2. L3 could be different from them.
414 # L1 should be equal to L2. L3 could be different from them.
419 # "text" may or may not affect commit hash depending on flag
415 # "text" may or may not affect commit hash depending on flag
420 # processors (see revlog.addflagprocessor).
416 # processors (see revlog.addflagprocessor).
421 #
417 #
422 # | common | rename | meta | ext
418 # | common | rename | meta | ext
423 # -------------------------------------------------
419 # -------------------------------------------------
424 # rawsize() | L1 | L1 | L1 | L1
420 # rawsize() | L1 | L1 | L1 | L1
425 # size() | L1 | L2-LM | L1(*) | L1 (?)
421 # size() | L1 | L2-LM | L1(*) | L1 (?)
426 # len(rawtext) | L2 | L2 | L2 | L2
422 # len(rawtext) | L2 | L2 | L2 | L2
427 # len(text) | L2 | L2 | L2 | L3
423 # len(text) | L2 | L2 | L2 | L3
428 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
424 # len(read()) | L2 | L2-LM | L2-LM | L3 (?)
429 #
425 #
430 # LM: length of metadata, depending on rawtext
426 # LM: length of metadata, depending on rawtext
431 # (*): not ideal, see comment in filelog.size
427 # (*): not ideal, see comment in filelog.size
432 # (?): could be "- len(meta)" if the resolved content has
428 # (?): could be "- len(meta)" if the resolved content has
433 # rename metadata
429 # rename metadata
434 #
430 #
435 # Checks needed to be done:
431 # Checks needed to be done:
436 # 1. length check: L1 == L2, in all cases.
432 # 1. length check: L1 == L2, in all cases.
437 # 2. hash check: depending on flag processor, we may need to
433 # 2. hash check: depending on flag processor, we may need to
438 # use either "text" (external), or "rawtext" (in revlog).
434 # use either "text" (external), or "rawtext" (in revlog).
439 try:
435 try:
440 skipflags = self.skipflags
436 skipflags = self.skipflags
441 if skipflags:
437 if skipflags:
442 skipflags &= fl.flags(i)
438 skipflags &= fl.flags(i)
443 if not skipflags:
439 if not skipflags:
444 fl.read(n) # side effect: read content and do checkhash
440 fl.read(n) # side effect: read content and do checkhash
445 rp = fl.renamed(n)
441 rp = fl.renamed(n)
446 # the "L1 == L2" check
442 # the "L1 == L2" check
447 l1 = fl.rawsize(i)
443 l1 = fl.rawsize(i)
448 l2 = len(fl.revision(n, raw=True))
444 l2 = len(fl.revision(n, raw=True))
449 if l1 != l2:
445 if l1 != l2:
450 self.err(lr, _("unpacked size is %s, %s expected") %
446 self.err(lr, _("unpacked size is %s, %s expected") %
451 (l2, l1), f)
447 (l2, l1), f)
452 except error.CensoredNodeError:
448 except error.CensoredNodeError:
453 # experimental config: censor.policy
449 # experimental config: censor.policy
454 if ui.config("censor", "policy") == "abort":
450 if ui.config("censor", "policy") == "abort":
455 self.err(lr, _("censored file data"), f)
451 self.err(lr, _("censored file data"), f)
456 except Exception as inst:
452 except Exception as inst:
457 self.exc(lr, _("unpacking %s") % short(n), inst, f)
453 self.exc(lr, _("unpacking %s") % short(n), inst, f)
458
454
459 # check renames
455 # check renames
460 try:
456 try:
461 if rp:
457 if rp:
462 if lr is not None and ui.verbose:
458 if lr is not None and ui.verbose:
463 ctx = lrugetctx(lr)
459 ctx = lrugetctx(lr)
464 if not any(rp[0] in pctx for pctx in ctx.parents()):
460 if not any(rp[0] in pctx for pctx in ctx.parents()):
465 self.warn(_("warning: copy source of '%s' not"
461 self.warn(_("warning: copy source of '%s' not"
466 " in parents of %s") % (f, ctx))
462 " in parents of %s") % (f, ctx))
467 fl2 = repo.file(rp[0])
463 fl2 = repo.file(rp[0])
468 if not len(fl2):
464 if not len(fl2):
469 self.err(lr, _("empty or missing copy source "
465 self.err(lr, _("empty or missing copy source "
470 "revlog %s:%s") % (rp[0], short(rp[1])), f)
466 "revlog %s:%s") % (rp[0], short(rp[1])), f)
471 elif rp[1] == nullid:
467 elif rp[1] == nullid:
472 ui.note(_("warning: %s@%s: copy source"
468 ui.note(_("warning: %s@%s: copy source"
473 " revision is nullid %s:%s\n")
469 " revision is nullid %s:%s\n")
474 % (f, lr, rp[0], short(rp[1])))
470 % (f, lr, rp[0], short(rp[1])))
475 else:
471 else:
476 fl2.rev(rp[1])
472 fl2.rev(rp[1])
477 except Exception as inst:
473 except Exception as inst:
478 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
474 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
479
475
480 # cross-check
476 # cross-check
481 if f in filenodes:
477 if f in filenodes:
482 fns = [(v, k) for k, v in filenodes[f].iteritems()]
478 fns = [(v, k) for k, v in filenodes[f].iteritems()]
483 for lr, node in sorted(fns):
479 for lr, node in sorted(fns):
484 self.err(lr, _("manifest refers to unknown revision %s") %
480 self.err(lr, _("manifest refers to unknown revision %s") %
485 short(node), f)
481 short(node), f)
486 ui.progress(_('checking'), None)
482 ui.progress(_('checking'), None)
487
483
488 if self.warnorphanstorefiles:
484 if self.warnorphanstorefiles:
489 for f in sorted(storefiles):
485 for f in sorted(storefiles):
490 self.warn(_("warning: orphan data file '%s'") % f)
486 self.warn(_("warning: orphan data file '%s'") % f)
491
487
492 return len(files), revisions
488 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now