##// END OF EJS Templates
censor: mark experimental option
Matt Mackall -
r25846:c55eac3f default
parent child Browse files
Show More
@@ -1,326 +1,327 b''
1 # verify.py - repository integrity checking for Mercurial
1 # verify.py - repository integrity checking for Mercurial
2 #
2 #
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 #
4 #
5 # This software may be used and distributed according to the terms of the
5 # This software may be used and distributed according to the terms of the
6 # GNU General Public License version 2 or any later version.
6 # GNU General Public License version 2 or any later version.
7
7
8 from node import nullid, short
8 from node import nullid, short
9 from i18n import _
9 from i18n import _
10 import os
10 import os
11 import revlog, util, error
11 import revlog, util, error
12
12
13 def verify(repo):
13 def verify(repo):
14 lock = repo.lock()
14 lock = repo.lock()
15 try:
15 try:
16 return _verify(repo)
16 return _verify(repo)
17 finally:
17 finally:
18 lock.release()
18 lock.release()
19
19
20 def _normpath(f):
20 def _normpath(f):
21 # under hg < 2.4, convert didn't sanitize paths properly, so a
21 # under hg < 2.4, convert didn't sanitize paths properly, so a
22 # converted repo may contain repeated slashes
22 # converted repo may contain repeated slashes
23 while '//' in f:
23 while '//' in f:
24 f = f.replace('//', '/')
24 f = f.replace('//', '/')
25 return f
25 return f
26
26
27 def _verify(repo):
27 def _verify(repo):
28 repo = repo.unfiltered()
28 repo = repo.unfiltered()
29 mflinkrevs = {}
29 mflinkrevs = {}
30 filelinkrevs = {}
30 filelinkrevs = {}
31 filenodes = {}
31 filenodes = {}
32 revisions = 0
32 revisions = 0
33 badrevs = set()
33 badrevs = set()
34 errors = [0]
34 errors = [0]
35 warnings = [0]
35 warnings = [0]
36 ui = repo.ui
36 ui = repo.ui
37 cl = repo.changelog
37 cl = repo.changelog
38 mf = repo.manifest
38 mf = repo.manifest
39 lrugetctx = util.lrucachefunc(repo.changectx)
39 lrugetctx = util.lrucachefunc(repo.changectx)
40
40
41 if not repo.url().startswith('file:'):
41 if not repo.url().startswith('file:'):
42 raise util.Abort(_("cannot verify bundle or remote repos"))
42 raise util.Abort(_("cannot verify bundle or remote repos"))
43
43
44 def err(linkrev, msg, filename=None):
44 def err(linkrev, msg, filename=None):
45 if linkrev is not None:
45 if linkrev is not None:
46 badrevs.add(linkrev)
46 badrevs.add(linkrev)
47 else:
47 else:
48 linkrev = '?'
48 linkrev = '?'
49 msg = "%s: %s" % (linkrev, msg)
49 msg = "%s: %s" % (linkrev, msg)
50 if filename:
50 if filename:
51 msg = "%s@%s" % (filename, msg)
51 msg = "%s@%s" % (filename, msg)
52 ui.warn(" " + msg + "\n")
52 ui.warn(" " + msg + "\n")
53 errors[0] += 1
53 errors[0] += 1
54
54
55 def exc(linkrev, msg, inst, filename=None):
55 def exc(linkrev, msg, inst, filename=None):
56 if isinstance(inst, KeyboardInterrupt):
56 if isinstance(inst, KeyboardInterrupt):
57 ui.warn(_("interrupted"))
57 ui.warn(_("interrupted"))
58 raise
58 raise
59 if not str(inst):
59 if not str(inst):
60 inst = repr(inst)
60 inst = repr(inst)
61 err(linkrev, "%s: %s" % (msg, inst), filename)
61 err(linkrev, "%s: %s" % (msg, inst), filename)
62
62
63 def warn(msg):
63 def warn(msg):
64 ui.warn(msg + "\n")
64 ui.warn(msg + "\n")
65 warnings[0] += 1
65 warnings[0] += 1
66
66
67 def checklog(obj, name, linkrev):
67 def checklog(obj, name, linkrev):
68 if not len(obj) and (havecl or havemf):
68 if not len(obj) and (havecl or havemf):
69 err(linkrev, _("empty or missing %s") % name)
69 err(linkrev, _("empty or missing %s") % name)
70 return
70 return
71
71
72 d = obj.checksize()
72 d = obj.checksize()
73 if d[0]:
73 if d[0]:
74 err(None, _("data length off by %d bytes") % d[0], name)
74 err(None, _("data length off by %d bytes") % d[0], name)
75 if d[1]:
75 if d[1]:
76 err(None, _("index contains %d extra bytes") % d[1], name)
76 err(None, _("index contains %d extra bytes") % d[1], name)
77
77
78 if obj.version != revlog.REVLOGV0:
78 if obj.version != revlog.REVLOGV0:
79 if not revlogv1:
79 if not revlogv1:
80 warn(_("warning: `%s' uses revlog format 1") % name)
80 warn(_("warning: `%s' uses revlog format 1") % name)
81 elif revlogv1:
81 elif revlogv1:
82 warn(_("warning: `%s' uses revlog format 0") % name)
82 warn(_("warning: `%s' uses revlog format 0") % name)
83
83
84 def checkentry(obj, i, node, seen, linkrevs, f):
84 def checkentry(obj, i, node, seen, linkrevs, f):
85 lr = obj.linkrev(obj.rev(node))
85 lr = obj.linkrev(obj.rev(node))
86 if lr < 0 or (havecl and lr not in linkrevs):
86 if lr < 0 or (havecl and lr not in linkrevs):
87 if lr < 0 or lr >= len(cl):
87 if lr < 0 or lr >= len(cl):
88 msg = _("rev %d points to nonexistent changeset %d")
88 msg = _("rev %d points to nonexistent changeset %d")
89 else:
89 else:
90 msg = _("rev %d points to unexpected changeset %d")
90 msg = _("rev %d points to unexpected changeset %d")
91 err(None, msg % (i, lr), f)
91 err(None, msg % (i, lr), f)
92 if linkrevs:
92 if linkrevs:
93 if f and len(linkrevs) > 1:
93 if f and len(linkrevs) > 1:
94 try:
94 try:
95 # attempt to filter down to real linkrevs
95 # attempt to filter down to real linkrevs
96 linkrevs = [l for l in linkrevs
96 linkrevs = [l for l in linkrevs
97 if lrugetctx(l)[f].filenode() == node]
97 if lrugetctx(l)[f].filenode() == node]
98 except Exception:
98 except Exception:
99 pass
99 pass
100 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
100 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
101 lr = None # can't be trusted
101 lr = None # can't be trusted
102
102
103 try:
103 try:
104 p1, p2 = obj.parents(node)
104 p1, p2 = obj.parents(node)
105 if p1 not in seen and p1 != nullid:
105 if p1 not in seen and p1 != nullid:
106 err(lr, _("unknown parent 1 %s of %s") %
106 err(lr, _("unknown parent 1 %s of %s") %
107 (short(p1), short(node)), f)
107 (short(p1), short(node)), f)
108 if p2 not in seen and p2 != nullid:
108 if p2 not in seen and p2 != nullid:
109 err(lr, _("unknown parent 2 %s of %s") %
109 err(lr, _("unknown parent 2 %s of %s") %
110 (short(p2), short(node)), f)
110 (short(p2), short(node)), f)
111 except Exception as inst:
111 except Exception as inst:
112 exc(lr, _("checking parents of %s") % short(node), inst, f)
112 exc(lr, _("checking parents of %s") % short(node), inst, f)
113
113
114 if node in seen:
114 if node in seen:
115 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
115 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
116 seen[node] = i
116 seen[node] = i
117 return lr
117 return lr
118
118
119 if os.path.exists(repo.sjoin("journal")):
119 if os.path.exists(repo.sjoin("journal")):
120 ui.warn(_("abandoned transaction found - run hg recover\n"))
120 ui.warn(_("abandoned transaction found - run hg recover\n"))
121
121
122 revlogv1 = cl.version != revlog.REVLOGV0
122 revlogv1 = cl.version != revlog.REVLOGV0
123 if ui.verbose or not revlogv1:
123 if ui.verbose or not revlogv1:
124 ui.status(_("repository uses revlog format %d\n") %
124 ui.status(_("repository uses revlog format %d\n") %
125 (revlogv1 and 1 or 0))
125 (revlogv1 and 1 or 0))
126
126
127 havecl = len(cl) > 0
127 havecl = len(cl) > 0
128 havemf = len(mf) > 0
128 havemf = len(mf) > 0
129
129
130 ui.status(_("checking changesets\n"))
130 ui.status(_("checking changesets\n"))
131 refersmf = False
131 refersmf = False
132 seen = {}
132 seen = {}
133 checklog(cl, "changelog", 0)
133 checklog(cl, "changelog", 0)
134 total = len(repo)
134 total = len(repo)
135 for i in repo:
135 for i in repo:
136 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
136 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
137 n = cl.node(i)
137 n = cl.node(i)
138 checkentry(cl, i, n, seen, [i], "changelog")
138 checkentry(cl, i, n, seen, [i], "changelog")
139
139
140 try:
140 try:
141 changes = cl.read(n)
141 changes = cl.read(n)
142 if changes[0] != nullid:
142 if changes[0] != nullid:
143 mflinkrevs.setdefault(changes[0], []).append(i)
143 mflinkrevs.setdefault(changes[0], []).append(i)
144 refersmf = True
144 refersmf = True
145 for f in changes[3]:
145 for f in changes[3]:
146 filelinkrevs.setdefault(_normpath(f), []).append(i)
146 filelinkrevs.setdefault(_normpath(f), []).append(i)
147 except Exception as inst:
147 except Exception as inst:
148 refersmf = True
148 refersmf = True
149 exc(i, _("unpacking changeset %s") % short(n), inst)
149 exc(i, _("unpacking changeset %s") % short(n), inst)
150 ui.progress(_('checking'), None)
150 ui.progress(_('checking'), None)
151
151
152 ui.status(_("checking manifests\n"))
152 ui.status(_("checking manifests\n"))
153 seen = {}
153 seen = {}
154 if refersmf:
154 if refersmf:
155 # Do not check manifest if there are only changelog entries with
155 # Do not check manifest if there are only changelog entries with
156 # null manifests.
156 # null manifests.
157 checklog(mf, "manifest", 0)
157 checklog(mf, "manifest", 0)
158 total = len(mf)
158 total = len(mf)
159 for i in mf:
159 for i in mf:
160 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
160 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
161 n = mf.node(i)
161 n = mf.node(i)
162 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
162 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
163 if n in mflinkrevs:
163 if n in mflinkrevs:
164 del mflinkrevs[n]
164 del mflinkrevs[n]
165 else:
165 else:
166 err(lr, _("%s not in changesets") % short(n), "manifest")
166 err(lr, _("%s not in changesets") % short(n), "manifest")
167
167
168 try:
168 try:
169 for f, fn in mf.readdelta(n).iteritems():
169 for f, fn in mf.readdelta(n).iteritems():
170 if not f:
170 if not f:
171 err(lr, _("file without name in manifest"))
171 err(lr, _("file without name in manifest"))
172 elif f != "/dev/null": # ignore this in very old repos
172 elif f != "/dev/null": # ignore this in very old repos
173 filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
173 filenodes.setdefault(_normpath(f), {}).setdefault(fn, lr)
174 except Exception as inst:
174 except Exception as inst:
175 exc(lr, _("reading manifest delta %s") % short(n), inst)
175 exc(lr, _("reading manifest delta %s") % short(n), inst)
176 ui.progress(_('checking'), None)
176 ui.progress(_('checking'), None)
177
177
178 ui.status(_("crosschecking files in changesets and manifests\n"))
178 ui.status(_("crosschecking files in changesets and manifests\n"))
179
179
180 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
180 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
181 count = 0
181 count = 0
182 if havemf:
182 if havemf:
183 for c, m in sorted([(c, m) for m in mflinkrevs
183 for c, m in sorted([(c, m) for m in mflinkrevs
184 for c in mflinkrevs[m]]):
184 for c in mflinkrevs[m]]):
185 count += 1
185 count += 1
186 if m == nullid:
186 if m == nullid:
187 continue
187 continue
188 ui.progress(_('crosschecking'), count, total=total)
188 ui.progress(_('crosschecking'), count, total=total)
189 err(c, _("changeset refers to unknown manifest %s") % short(m))
189 err(c, _("changeset refers to unknown manifest %s") % short(m))
190 mflinkrevs = None # del is bad here due to scope issues
190 mflinkrevs = None # del is bad here due to scope issues
191
191
192 for f in sorted(filelinkrevs):
192 for f in sorted(filelinkrevs):
193 count += 1
193 count += 1
194 ui.progress(_('crosschecking'), count, total=total)
194 ui.progress(_('crosschecking'), count, total=total)
195 if f not in filenodes:
195 if f not in filenodes:
196 lr = filelinkrevs[f][0]
196 lr = filelinkrevs[f][0]
197 err(lr, _("in changeset but not in manifest"), f)
197 err(lr, _("in changeset but not in manifest"), f)
198
198
199 if havecl:
199 if havecl:
200 for f in sorted(filenodes):
200 for f in sorted(filenodes):
201 count += 1
201 count += 1
202 ui.progress(_('crosschecking'), count, total=total)
202 ui.progress(_('crosschecking'), count, total=total)
203 if f not in filelinkrevs:
203 if f not in filelinkrevs:
204 try:
204 try:
205 fl = repo.file(f)
205 fl = repo.file(f)
206 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
206 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
207 except Exception:
207 except Exception:
208 lr = None
208 lr = None
209 err(lr, _("in manifest but not in changeset"), f)
209 err(lr, _("in manifest but not in changeset"), f)
210
210
211 ui.progress(_('crosschecking'), None)
211 ui.progress(_('crosschecking'), None)
212
212
213 ui.status(_("checking files\n"))
213 ui.status(_("checking files\n"))
214
214
215 storefiles = set()
215 storefiles = set()
216 for f, f2, size in repo.store.datafiles():
216 for f, f2, size in repo.store.datafiles():
217 if not f:
217 if not f:
218 err(None, _("cannot decode filename '%s'") % f2)
218 err(None, _("cannot decode filename '%s'") % f2)
219 elif size > 0 or not revlogv1:
219 elif size > 0 or not revlogv1:
220 storefiles.add(_normpath(f))
220 storefiles.add(_normpath(f))
221
221
222 fncachewarned = False
222 fncachewarned = False
223 files = sorted(set(filenodes) | set(filelinkrevs))
223 files = sorted(set(filenodes) | set(filelinkrevs))
224 total = len(files)
224 total = len(files)
225 for i, f in enumerate(files):
225 for i, f in enumerate(files):
226 ui.progress(_('checking'), i, item=f, total=total)
226 ui.progress(_('checking'), i, item=f, total=total)
227 try:
227 try:
228 linkrevs = filelinkrevs[f]
228 linkrevs = filelinkrevs[f]
229 except KeyError:
229 except KeyError:
230 # in manifest but not in changelog
230 # in manifest but not in changelog
231 linkrevs = []
231 linkrevs = []
232
232
233 if linkrevs:
233 if linkrevs:
234 lr = linkrevs[0]
234 lr = linkrevs[0]
235 else:
235 else:
236 lr = None
236 lr = None
237
237
238 try:
238 try:
239 fl = repo.file(f)
239 fl = repo.file(f)
240 except error.RevlogError as e:
240 except error.RevlogError as e:
241 err(lr, _("broken revlog! (%s)") % e, f)
241 err(lr, _("broken revlog! (%s)") % e, f)
242 continue
242 continue
243
243
244 for ff in fl.files():
244 for ff in fl.files():
245 try:
245 try:
246 storefiles.remove(ff)
246 storefiles.remove(ff)
247 except KeyError:
247 except KeyError:
248 warn(_(" warning: revlog '%s' not in fncache!") % ff)
248 warn(_(" warning: revlog '%s' not in fncache!") % ff)
249 fncachewarned = True
249 fncachewarned = True
250
250
251 checklog(fl, f, lr)
251 checklog(fl, f, lr)
252 seen = {}
252 seen = {}
253 rp = None
253 rp = None
254 for i in fl:
254 for i in fl:
255 revisions += 1
255 revisions += 1
256 n = fl.node(i)
256 n = fl.node(i)
257 lr = checkentry(fl, i, n, seen, linkrevs, f)
257 lr = checkentry(fl, i, n, seen, linkrevs, f)
258 if f in filenodes:
258 if f in filenodes:
259 if havemf and n not in filenodes[f]:
259 if havemf and n not in filenodes[f]:
260 err(lr, _("%s not in manifests") % (short(n)), f)
260 err(lr, _("%s not in manifests") % (short(n)), f)
261 else:
261 else:
262 del filenodes[f][n]
262 del filenodes[f][n]
263
263
264 # verify contents
264 # verify contents
265 try:
265 try:
266 l = len(fl.read(n))
266 l = len(fl.read(n))
267 rp = fl.renamed(n)
267 rp = fl.renamed(n)
268 if l != fl.size(i):
268 if l != fl.size(i):
269 if len(fl.revision(n)) != fl.size(i):
269 if len(fl.revision(n)) != fl.size(i):
270 err(lr, _("unpacked size is %s, %s expected") %
270 err(lr, _("unpacked size is %s, %s expected") %
271 (l, fl.size(i)), f)
271 (l, fl.size(i)), f)
272 except error.CensoredNodeError:
272 except error.CensoredNodeError:
273 # experimental config: censor.policy
273 if ui.config("censor", "policy", "abort") == "abort":
274 if ui.config("censor", "policy", "abort") == "abort":
274 err(lr, _("censored file data"), f)
275 err(lr, _("censored file data"), f)
275 except Exception as inst:
276 except Exception as inst:
276 exc(lr, _("unpacking %s") % short(n), inst, f)
277 exc(lr, _("unpacking %s") % short(n), inst, f)
277
278
278 # check renames
279 # check renames
279 try:
280 try:
280 if rp:
281 if rp:
281 if lr is not None and ui.verbose:
282 if lr is not None and ui.verbose:
282 ctx = lrugetctx(lr)
283 ctx = lrugetctx(lr)
283 found = False
284 found = False
284 for pctx in ctx.parents():
285 for pctx in ctx.parents():
285 if rp[0] in pctx:
286 if rp[0] in pctx:
286 found = True
287 found = True
287 break
288 break
288 if not found:
289 if not found:
289 warn(_("warning: copy source of '%s' not"
290 warn(_("warning: copy source of '%s' not"
290 " in parents of %s") % (f, ctx))
291 " in parents of %s") % (f, ctx))
291 fl2 = repo.file(rp[0])
292 fl2 = repo.file(rp[0])
292 if not len(fl2):
293 if not len(fl2):
293 err(lr, _("empty or missing copy source revlog %s:%s")
294 err(lr, _("empty or missing copy source revlog %s:%s")
294 % (rp[0], short(rp[1])), f)
295 % (rp[0], short(rp[1])), f)
295 elif rp[1] == nullid:
296 elif rp[1] == nullid:
296 ui.note(_("warning: %s@%s: copy source"
297 ui.note(_("warning: %s@%s: copy source"
297 " revision is nullid %s:%s\n")
298 " revision is nullid %s:%s\n")
298 % (f, lr, rp[0], short(rp[1])))
299 % (f, lr, rp[0], short(rp[1])))
299 else:
300 else:
300 fl2.rev(rp[1])
301 fl2.rev(rp[1])
301 except Exception as inst:
302 except Exception as inst:
302 exc(lr, _("checking rename of %s") % short(n), inst, f)
303 exc(lr, _("checking rename of %s") % short(n), inst, f)
303
304
304 # cross-check
305 # cross-check
305 if f in filenodes:
306 if f in filenodes:
306 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
307 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
307 for lr, node in sorted(fns):
308 for lr, node in sorted(fns):
308 err(lr, _("%s in manifests not found") % short(node), f)
309 err(lr, _("%s in manifests not found") % short(node), f)
309 ui.progress(_('checking'), None)
310 ui.progress(_('checking'), None)
310
311
311 for f in storefiles:
312 for f in storefiles:
312 warn(_("warning: orphan revlog '%s'") % f)
313 warn(_("warning: orphan revlog '%s'") % f)
313
314
314 ui.status(_("%d files, %d changesets, %d total revisions\n") %
315 ui.status(_("%d files, %d changesets, %d total revisions\n") %
315 (len(files), len(cl), revisions))
316 (len(files), len(cl), revisions))
316 if warnings[0]:
317 if warnings[0]:
317 ui.warn(_("%d warnings encountered!\n") % warnings[0])
318 ui.warn(_("%d warnings encountered!\n") % warnings[0])
318 if fncachewarned:
319 if fncachewarned:
319 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
320 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
320 'corrupt fncache\n'))
321 'corrupt fncache\n'))
321 if errors[0]:
322 if errors[0]:
322 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
323 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
323 if badrevs:
324 if badrevs:
324 ui.warn(_("(first damaged changeset appears to be %d)\n")
325 ui.warn(_("(first damaged changeset appears to be %d)\n")
325 % min(badrevs))
326 % min(badrevs))
326 return 1
327 return 1
General Comments 0
You need to be logged in to leave comments. Login now