##// END OF EJS Templates
verify: report censored nodes if configured policy is abort
Mike Edgar -
r22933:3a60cd44 default
parent child Browse files
Show More
@@ -1,318 +1,321
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from node import nullid, short
8 from node import nullid, short
9 from i18n import _
9 from i18n import _
10 import os
10 import os
11 import revlog, util, error
11 import revlog, util, error
12
12
13 def verify(repo):
13 def verify(repo):
14 lock = repo.lock()
14 lock = repo.lock()
15 try:
15 try:
16 return _verify(repo)
16 return _verify(repo)
17 finally:
17 finally:
18 lock.release()
18 lock.release()
19
19
20 def _normpath(f):
20 def _normpath(f):
21 # under hg < 2.4, convert didn't sanitize paths properly, so a
21 # under hg < 2.4, convert didn't sanitize paths properly, so a
22 # converted repo may contain repeated slashes
22 # converted repo may contain repeated slashes
23 while '//' in f:
23 while '//' in f:
24 f = f.replace('//', '/')
24 f = f.replace('//', '/')
25 return f
25 return f
26
26
27 def _verify(repo):
27 def _verify(repo):
28 repo = repo.unfiltered()
28 repo = repo.unfiltered()
29 mflinkrevs = {}
29 mflinkrevs = {}
30 filelinkrevs = {}
30 filelinkrevs = {}
31 filenodes = {}
31 filenodes = {}
32 revisions = 0
32 revisions = 0
33 badrevs = set()
33 badrevs = set()
34 errors = [0]
34 errors = [0]
35 warnings = [0]
35 warnings = [0]
36 ui = repo.ui
36 ui = repo.ui
37 cl = repo.changelog
37 cl = repo.changelog
38 mf = repo.manifest
38 mf = repo.manifest
39 lrugetctx = util.lrucachefunc(repo.changectx)
39 lrugetctx = util.lrucachefunc(repo.changectx)
40
40
41 if not repo.url().startswith('file:'):
41 if not repo.url().startswith('file:'):
42 raise util.Abort(_("cannot verify bundle or remote repos"))
42 raise util.Abort(_("cannot verify bundle or remote repos"))
43
43
44 def err(linkrev, msg, filename=None):
44 def err(linkrev, msg, filename=None):
45 if linkrev is not None:
45 if linkrev is not None:
46 badrevs.add(linkrev)
46 badrevs.add(linkrev)
47 else:
47 else:
48 linkrev = '?'
48 linkrev = '?'
49 msg = "%s: %s" % (linkrev, msg)
49 msg = "%s: %s" % (linkrev, msg)
50 if filename:
50 if filename:
51 msg = "%s@%s" % (filename, msg)
51 msg = "%s@%s" % (filename, msg)
52 ui.warn(" " + msg + "\n")
52 ui.warn(" " + msg + "\n")
53 errors[0] += 1
53 errors[0] += 1
54
54
55 def exc(linkrev, msg, inst, filename=None):
55 def exc(linkrev, msg, inst, filename=None):
56 if isinstance(inst, KeyboardInterrupt):
56 if isinstance(inst, KeyboardInterrupt):
57 ui.warn(_("interrupted"))
57 ui.warn(_("interrupted"))
58 raise
58 raise
59 if not str(inst):
59 if not str(inst):
60 inst = repr(inst)
60 inst = repr(inst)
61 err(linkrev, "%s: %s" % (msg, inst), filename)
61 err(linkrev, "%s: %s" % (msg, inst), filename)
62
62
63 def warn(msg):
63 def warn(msg):
64 ui.warn(msg + "\n")
64 ui.warn(msg + "\n")
65 warnings[0] += 1
65 warnings[0] += 1
66
66
67 def checklog(obj, name, linkrev):
67 def checklog(obj, name, linkrev):
68 if not len(obj) and (havecl or havemf):
68 if not len(obj) and (havecl or havemf):
69 err(linkrev, _("empty or missing %s") % name)
69 err(linkrev, _("empty or missing %s") % name)
70 return
70 return
71
71
72 d = obj.checksize()
72 d = obj.checksize()
73 if d[0]:
73 if d[0]:
74 err(None, _("data length off by %d bytes") % d[0], name)
74 err(None, _("data length off by %d bytes") % d[0], name)
75 if d[1]:
75 if d[1]:
76 err(None, _("index contains %d extra bytes") % d[1], name)
76 err(None, _("index contains %d extra bytes") % d[1], name)
77
77
78 if obj.version != revlog.REVLOGV0:
78 if obj.version != revlog.REVLOGV0:
79 if not revlogv1:
79 if not revlogv1:
80 warn(_("warning: `%s' uses revlog format 1") % name)
80 warn(_("warning: `%s' uses revlog format 1") % name)
81 elif revlogv1:
81 elif revlogv1:
82 warn(_("warning: `%s' uses revlog format 0") % name)
82 warn(_("warning: `%s' uses revlog format 0") % name)
83
83
84 def checkentry(obj, i, node, seen, linkrevs, f):
84 def checkentry(obj, i, node, seen, linkrevs, f):
85 lr = obj.linkrev(obj.rev(node))
85 lr = obj.linkrev(obj.rev(node))
86 if lr < 0 or (havecl and lr not in linkrevs):
86 if lr < 0 or (havecl and lr not in linkrevs):
87 if lr < 0 or lr >= len(cl):
87 if lr < 0 or lr >= len(cl):
88 msg = _("rev %d points to nonexistent changeset %d")
88 msg = _("rev %d points to nonexistent changeset %d")
89 else:
89 else:
90 msg = _("rev %d points to unexpected changeset %d")
90 msg = _("rev %d points to unexpected changeset %d")
91 err(None, msg % (i, lr), f)
91 err(None, msg % (i, lr), f)
92 if linkrevs:
92 if linkrevs:
93 if f and len(linkrevs) > 1:
93 if f and len(linkrevs) > 1:
94 try:
94 try:
95 # attempt to filter down to real linkrevs
95 # attempt to filter down to real linkrevs
96 linkrevs = [l for l in linkrevs
96 linkrevs = [l for l in linkrevs
97 if lrugetctx(l)[f].filenode() == node]
97 if lrugetctx(l)[f].filenode() == node]
98 except Exception:
98 except Exception:
99 pass
99 pass
100 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
100 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
101 lr = None # can't be trusted
101 lr = None # can't be trusted
102
102
103 try:
103 try:
104 p1, p2 = obj.parents(node)
104 p1, p2 = obj.parents(node)
105 if p1 not in seen and p1 != nullid:
105 if p1 not in seen and p1 != nullid:
106 err(lr, _("unknown parent 1 %s of %s") %
106 err(lr, _("unknown parent 1 %s of %s") %
107 (short(p1), short(node)), f)
107 (short(p1), short(node)), f)
108 if p2 not in seen and p2 != nullid:
108 if p2 not in seen and p2 != nullid:
109 err(lr, _("unknown parent 2 %s of %s") %
109 err(lr, _("unknown parent 2 %s of %s") %
110 (short(p2), short(node)), f)
110 (short(p2), short(node)), f)
111 except Exception, inst:
111 except Exception, inst:
112 exc(lr, _("checking parents of %s") % short(node), inst, f)
112 exc(lr, _("checking parents of %s") % short(node), inst, f)
113
113
114 if node in seen:
114 if node in seen:
115 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
115 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
116 seen[node] = i
116 seen[node] = i
117 return lr
117 return lr
118
118
119 if os.path.exists(repo.sjoin("journal")):
119 if os.path.exists(repo.sjoin("journal")):
120 ui.warn(_("abandoned transaction found - run hg recover\n"))
120 ui.warn(_("abandoned transaction found - run hg recover\n"))
121
121
122 revlogv1 = cl.version != revlog.REVLOGV0
122 revlogv1 = cl.version != revlog.REVLOGV0
123 if ui.verbose or not revlogv1:
123 if ui.verbose or not revlogv1:
124 ui.status(_("repository uses revlog format %d\n") %
124 ui.status(_("repository uses revlog format %d\n") %
125 (revlogv1 and 1 or 0))
125 (revlogv1 and 1 or 0))
126
126
127 havecl = len(cl) > 0
127 havecl = len(cl) > 0
128 havemf = len(mf) > 0
128 havemf = len(mf) > 0
129
129
130 ui.status(_("checking changesets\n"))
130 ui.status(_("checking changesets\n"))
131 refersmf = False
131 refersmf = False
132 seen = {}
132 seen = {}
133 checklog(cl, "changelog", 0)
133 checklog(cl, "changelog", 0)
134 total = len(repo)
134 total = len(repo)
135 for i in repo:
135 for i in repo:
136 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
136 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
137 n = cl.node(i)
137 n = cl.node(i)
138 checkentry(cl, i, n, seen, [i], "changelog")
138 checkentry(cl, i, n, seen, [i], "changelog")
139
139
140 try:
140 try:
141 changes = cl.read(n)
141 changes = cl.read(n)
142 if changes[0] != nullid:
142 if changes[0] != nullid:
143 mflinkrevs.setdefault(changes[0], []).append(i)
143 mflinkrevs.setdefault(changes[0], []).append(i)
144 refersmf = True
144 refersmf = True
145 for f in changes[3]:
145 for f in changes[3]:
146 filelinkrevs.setdefault(_normpath(f), []).append(i)
146 filelinkrevs.setdefault(_normpath(f), []).append(i)
147 except Exception, inst:
147 except Exception, inst:
148 refersmf = True
148 refersmf = True
149 exc(i, _("unpacking changeset %s") % short(n), inst)
149 exc(i, _("unpacking changeset %s") % short(n), inst)
150 ui.progress(_('checking'), None)
150 ui.progress(_('checking'), None)
151
151
152 ui.status(_("checking manifests\n"))
152 ui.status(_("checking manifests\n"))
153 seen = {}
153 seen = {}
154 if refersmf:
154 if refersmf:
155 # Do not check manifest if there are only changelog entries with
155 # Do not check manifest if there are only changelog entries with
156 # null manifests.
156 # null manifests.
157 checklog(mf, "manifest", 0)
157 checklog(mf, "manifest", 0)
158 total = len(mf)
158 total = len(mf)
159 for i in mf:
159 for i in mf:
160 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
160 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
161 n = mf.node(i)
161 n = mf.node(i)
162 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
162 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
163 if n in mflinkrevs:
163 if n in mflinkrevs:
164 del mflinkrevs[n]
164 del mflinkrevs[n]
165 else:
165 else:
166 err(lr, _("%s not in changesets") % short(n), "manifest")
166 err(lr, _("%s not in changesets") % short(n), "manifest")
167
167
168 try:
168 try:
169 for f, fn in mf.readdelta(n).iteritems():
169 for f, fn in mf.readdelta(n).iteritems():
170 if not f:
170 if not f:
171 err(lr, _("file without name in manifest"))
171 err(lr, _("file without name in manifest"))
172 elif f != "/dev/null":
172 elif f != "/dev/null":
173 filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
173 filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
174 except Exception, inst:
174 except Exception, inst:
175 exc(lr, _("reading manifest delta %s") % short(n), inst)
175 exc(lr, _("reading manifest delta %s") % short(n), inst)
176 ui.progress(_('checking'), None)
176 ui.progress(_('checking'), None)
177
177
178 ui.status(_("crosschecking files in changesets and manifests\n"))
178 ui.status(_("crosschecking files in changesets and manifests\n"))
179
179
180 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
180 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
181 count = 0
181 count = 0
182 if havemf:
182 if havemf:
183 for c, m in sorted([(c, m) for m in mflinkrevs
183 for c, m in sorted([(c, m) for m in mflinkrevs
184 for c in mflinkrevs[m]]):
184 for c in mflinkrevs[m]]):
185 count += 1
185 count += 1
186 if m == nullid:
186 if m == nullid:
187 continue
187 continue
188 ui.progress(_('crosschecking'), count, total=total)
188 ui.progress(_('crosschecking'), count, total=total)
189 err(c, _("changeset refers to unknown manifest %s") % short(m))
189 err(c, _("changeset refers to unknown manifest %s") % short(m))
190 mflinkrevs = None # del is bad here due to scope issues
190 mflinkrevs = None # del is bad here due to scope issues
191
191
192 for f in sorted(filelinkrevs):
192 for f in sorted(filelinkrevs):
193 count += 1
193 count += 1
194 ui.progress(_('crosschecking'), count, total=total)
194 ui.progress(_('crosschecking'), count, total=total)
195 if f not in filenodes:
195 if f not in filenodes:
196 lr = filelinkrevs[f][0]
196 lr = filelinkrevs[f][0]
197 err(lr, _("in changeset but not in manifest"), f)
197 err(lr, _("in changeset but not in manifest"), f)
198
198
199 if havecl:
199 if havecl:
200 for f in sorted(filenodes):
200 for f in sorted(filenodes):
201 count += 1
201 count += 1
202 ui.progress(_('crosschecking'), count, total=total)
202 ui.progress(_('crosschecking'), count, total=total)
203 if f not in filelinkrevs:
203 if f not in filelinkrevs:
204 try:
204 try:
205 fl = repo.file(f)
205 fl = repo.file(f)
206 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
206 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
207 except Exception:
207 except Exception:
208 lr = None
208 lr = None
209 err(lr, _("in manifest but not in changeset"), f)
209 err(lr, _("in manifest but not in changeset"), f)
210
210
211 ui.progress(_('crosschecking'), None)
211 ui.progress(_('crosschecking'), None)
212
212
213 ui.status(_("checking files\n"))
213 ui.status(_("checking files\n"))
214
214
215 storefiles = set()
215 storefiles = set()
216 for f, f2, size in repo.store.datafiles():
216 for f, f2, size in repo.store.datafiles():
217 if not f:
217 if not f:
218 err(None, _("cannot decode filename '%s'") % f2)
218 err(None, _("cannot decode filename '%s'") % f2)
219 elif size > 0 or not revlogv1:
219 elif size > 0 or not revlogv1:
220 storefiles.add(_normpath(f))
220 storefiles.add(_normpath(f))
221
221
222 files = sorted(set(filenodes) | set(filelinkrevs))
222 files = sorted(set(filenodes) | set(filelinkrevs))
223 total = len(files)
223 total = len(files)
224 for i, f in enumerate(files):
224 for i, f in enumerate(files):
225 ui.progress(_('checking'), i, item=f, total=total)
225 ui.progress(_('checking'), i, item=f, total=total)
226 try:
226 try:
227 linkrevs = filelinkrevs[f]
227 linkrevs = filelinkrevs[f]
228 except KeyError:
228 except KeyError:
229 # in manifest but not in changelog
229 # in manifest but not in changelog
230 linkrevs = []
230 linkrevs = []
231
231
232 if linkrevs:
232 if linkrevs:
233 lr = linkrevs[0]
233 lr = linkrevs[0]
234 else:
234 else:
235 lr = None
235 lr = None
236
236
237 try:
237 try:
238 fl = repo.file(f)
238 fl = repo.file(f)
239 except error.RevlogError, e:
239 except error.RevlogError, e:
240 err(lr, _("broken revlog! (%s)") % e, f)
240 err(lr, _("broken revlog! (%s)") % e, f)
241 continue
241 continue
242
242
243 for ff in fl.files():
243 for ff in fl.files():
244 try:
244 try:
245 storefiles.remove(ff)
245 storefiles.remove(ff)
246 except KeyError:
246 except KeyError:
247 err(lr, _("missing revlog!"), ff)
247 err(lr, _("missing revlog!"), ff)
248
248
249 checklog(fl, f, lr)
249 checklog(fl, f, lr)
250 seen = {}
250 seen = {}
251 rp = None
251 rp = None
252 for i in fl:
252 for i in fl:
253 revisions += 1
253 revisions += 1
254 n = fl.node(i)
254 n = fl.node(i)
255 lr = checkentry(fl, i, n, seen, linkrevs, f)
255 lr = checkentry(fl, i, n, seen, linkrevs, f)
256 if f in filenodes:
256 if f in filenodes:
257 if havemf and n not in filenodes[f]:
257 if havemf and n not in filenodes[f]:
258 err(lr, _("%s not in manifests") % (short(n)), f)
258 err(lr, _("%s not in manifests") % (short(n)), f)
259 else:
259 else:
260 del filenodes[f][n]
260 del filenodes[f][n]
261
261
262 # verify contents
262 # verify contents
263 try:
263 try:
264 l = len(fl.read(n))
264 l = len(fl.read(n))
265 rp = fl.renamed(n)
265 rp = fl.renamed(n)
266 if l != fl.size(i):
266 if l != fl.size(i):
267 if len(fl.revision(n)) != fl.size(i):
267 if len(fl.revision(n)) != fl.size(i):
268 err(lr, _("unpacked size is %s, %s expected") %
268 err(lr, _("unpacked size is %s, %s expected") %
269 (l, fl.size(i)), f)
269 (l, fl.size(i)), f)
270 except error.CensoredNodeError:
271 if ui.config("censor", "policy", "abort") == "abort":
272 err(lr, _("censored file data"), f)
270 except Exception, inst:
273 except Exception, inst:
271 exc(lr, _("unpacking %s") % short(n), inst, f)
274 exc(lr, _("unpacking %s") % short(n), inst, f)
272
275
273 # check renames
276 # check renames
274 try:
277 try:
275 if rp:
278 if rp:
276 if lr is not None and ui.verbose:
279 if lr is not None and ui.verbose:
277 ctx = lrugetctx(lr)
280 ctx = lrugetctx(lr)
278 found = False
281 found = False
279 for pctx in ctx.parents():
282 for pctx in ctx.parents():
280 if rp[0] in pctx:
283 if rp[0] in pctx:
281 found = True
284 found = True
282 break
285 break
283 if not found:
286 if not found:
284 warn(_("warning: copy source of '%s' not"
287 warn(_("warning: copy source of '%s' not"
285 " in parents of %s") % (f, ctx))
288 " in parents of %s") % (f, ctx))
286 fl2 = repo.file(rp[0])
289 fl2 = repo.file(rp[0])
287 if not len(fl2):
290 if not len(fl2):
288 err(lr, _("empty or missing copy source revlog %s:%s")
291 err(lr, _("empty or missing copy source revlog %s:%s")
289 % (rp[0], short(rp[1])), f)
292 % (rp[0], short(rp[1])), f)
290 elif rp[1] == nullid:
293 elif rp[1] == nullid:
291 ui.note(_("warning: %s@%s: copy source"
294 ui.note(_("warning: %s@%s: copy source"
292 " revision is nullid %s:%s\n")
295 " revision is nullid %s:%s\n")
293 % (f, lr, rp[0], short(rp[1])))
296 % (f, lr, rp[0], short(rp[1])))
294 else:
297 else:
295 fl2.rev(rp[1])
298 fl2.rev(rp[1])
296 except Exception, inst:
299 except Exception, inst:
297 exc(lr, _("checking rename of %s") % short(n), inst, f)
300 exc(lr, _("checking rename of %s") % short(n), inst, f)
298
301
299 # cross-check
302 # cross-check
300 if f in filenodes:
303 if f in filenodes:
301 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
304 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
302 for lr, node in sorted(fns):
305 for lr, node in sorted(fns):
303 err(lr, _("%s in manifests not found") % short(node), f)
306 err(lr, _("%s in manifests not found") % short(node), f)
304 ui.progress(_('checking'), None)
307 ui.progress(_('checking'), None)
305
308
306 for f in storefiles:
309 for f in storefiles:
307 warn(_("warning: orphan revlog '%s'") % f)
310 warn(_("warning: orphan revlog '%s'") % f)
308
311
309 ui.status(_("%d files, %d changesets, %d total revisions\n") %
312 ui.status(_("%d files, %d changesets, %d total revisions\n") %
310 (len(files), len(cl), revisions))
313 (len(files), len(cl), revisions))
311 if warnings[0]:
314 if warnings[0]:
312 ui.warn(_("%d warnings encountered!\n") % warnings[0])
315 ui.warn(_("%d warnings encountered!\n") % warnings[0])
313 if errors[0]:
316 if errors[0]:
314 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
317 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
315 if badrevs:
318 if badrevs:
316 ui.warn(_("(first damaged changeset appears to be %d)\n")
319 ui.warn(_("(first damaged changeset appears to be %d)\n")
317 % min(badrevs))
320 % min(badrevs))
318 return 1
321 return 1
General Comments 0
You need to be logged in to leave comments. Login now