##// END OF EJS Templates
verify: filter the candidate list for broken linkrevs
Matt Mackall -
r9657:96c803e9 default
parent child Browse files
Show More
@@ -1,269 +1,274 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2, incorporated herein by reference.
7 7
8 8 from node import nullid, short
9 9 from i18n import _
10 10 import revlog, util, error
11 11
12 12 def verify(repo):
13 13 lock = repo.lock()
14 14 try:
15 15 return _verify(repo)
16 16 finally:
17 17 lock.release()
18 18
19 19 def _verify(repo):
20 20 mflinkrevs = {}
21 21 filelinkrevs = {}
22 22 filenodes = {}
23 23 revisions = 0
24 24 badrevs = set()
25 25 errors = [0]
26 26 warnings = [0]
27 27 ui = repo.ui
28 28 cl = repo.changelog
29 29 mf = repo.manifest
30 lrugetctx = util.lrucachefunc(repo.changectx)
30 31
31 32 if not repo.cancopy():
32 33 raise util.Abort(_("cannot verify bundle or remote repos"))
33 34
34 35 def err(linkrev, msg, filename=None):
35 36 if linkrev != None:
36 37 badrevs.add(linkrev)
37 38 else:
38 39 linkrev = '?'
39 40 msg = "%s: %s" % (linkrev, msg)
40 41 if filename:
41 42 msg = "%s@%s" % (filename, msg)
42 43 ui.warn(" " + msg + "\n")
43 44 errors[0] += 1
44 45
45 46 def exc(linkrev, msg, inst, filename=None):
46 47 if isinstance(inst, KeyboardInterrupt):
47 48 ui.warn(_("interrupted"))
48 49 raise
49 50 err(linkrev, "%s: %s" % (msg, inst), filename)
50 51
51 52 def warn(msg):
52 53 ui.warn(msg + "\n")
53 54 warnings[0] += 1
54 55
55 56 def checklog(obj, name, linkrev):
56 57 if not len(obj) and (havecl or havemf):
57 58 err(linkrev, _("empty or missing %s") % name)
58 59 return
59 60
60 61 d = obj.checksize()
61 62 if d[0]:
62 63 err(None, _("data length off by %d bytes") % d[0], name)
63 64 if d[1]:
64 65 err(None, _("index contains %d extra bytes") % d[1], name)
65 66
66 67 if obj.version != revlog.REVLOGV0:
67 68 if not revlogv1:
68 69 warn(_("warning: `%s' uses revlog format 1") % name)
69 70 elif revlogv1:
70 71 warn(_("warning: `%s' uses revlog format 0") % name)
71 72
72 73 def checkentry(obj, i, node, seen, linkrevs, f):
73 74 lr = obj.linkrev(obj.rev(node))
74 75 if lr < 0 or (havecl and lr not in linkrevs):
75 76 if lr < 0 or lr >= len(cl):
76 77 msg = _("rev %d points to nonexistent changeset %d")
77 78 else:
78 79 msg = _("rev %d points to unexpected changeset %d")
79 80 err(None, msg % (i, lr), f)
80 81 if linkrevs:
82 if f and len(linkrevs) > 1:
83 try:
84 # attempt to filter down to real linkrevs
85 linkrevs = [l for l in linkrevs
86 if lrugetctx(l)[f].filenode() == node]
87 except:
88 pass
81 89 warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
82 90 lr = None # can't be trusted
83 91
84 92 try:
85 93 p1, p2 = obj.parents(node)
86 94 if p1 not in seen and p1 != nullid:
87 95 err(lr, _("unknown parent 1 %s of %s") %
88 96 (short(p1), short(n)), f)
89 97 if p2 not in seen and p2 != nullid:
90 98 err(lr, _("unknown parent 2 %s of %s") %
91 99 (short(p2), short(p1)), f)
92 100 except Exception, inst:
93 101 exc(lr, _("checking parents of %s") % short(node), inst, f)
94 102
95 103 if node in seen:
96 104 err(lr, _("duplicate revision %d (%d)") % (i, seen[n]), f)
97 105 seen[n] = i
98 106 return lr
99 107
100 108 revlogv1 = cl.version != revlog.REVLOGV0
101 109 if ui.verbose or not revlogv1:
102 110 ui.status(_("repository uses revlog format %d\n") %
103 111 (revlogv1 and 1 or 0))
104 112
105 113 havecl = len(cl) > 0
106 114 havemf = len(mf) > 0
107 115
108 116 ui.status(_("checking changesets\n"))
109 117 seen = {}
110 118 checklog(cl, "changelog", 0)
111 119 for i in repo:
112 120 n = cl.node(i)
113 121 checkentry(cl, i, n, seen, [i], "changelog")
114 122
115 123 try:
116 124 changes = cl.read(n)
117 125 mflinkrevs.setdefault(changes[0], []).append(i)
118 126 for f in changes[3]:
119 127 filelinkrevs.setdefault(f, []).append(i)
120 128 except Exception, inst:
121 129 exc(i, _("unpacking changeset %s") % short(n), inst)
122 130
123 131 ui.status(_("checking manifests\n"))
124 132 seen = {}
125 133 checklog(mf, "manifest", 0)
126 134 for i in mf:
127 135 n = mf.node(i)
128 136 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
129 137 if n in mflinkrevs:
130 138 del mflinkrevs[n]
131 139 else:
132 140 err(lr, _("%s not in changesets") % short(n), "manifest")
133 141
134 142 try:
135 143 for f, fn in mf.readdelta(n).iteritems():
136 144 if not f:
137 145 err(lr, _("file without name in manifest"))
138 146 elif f != "/dev/null":
139 fns = filenodes.setdefault(f, {})
140 if fn not in fns:
141 fns[fn] = i
147 filenodes.setdefault(f, {}).setdefault(fn, lr)
142 148 except Exception, inst:
143 149 exc(lr, _("reading manifest delta %s") % short(n), inst)
144 150
145 151 ui.status(_("crosschecking files in changesets and manifests\n"))
146 152
147 153 if havemf:
148 154 for c,m in sorted([(c, m) for m in mflinkrevs for c in mflinkrevs[m]]):
149 155 err(c, _("changeset refers to unknown manifest %s") % short(m))
150 156 mflinkrevs = None # del is bad here due to scope issues
151 157
152 158 for f in sorted(filelinkrevs):
153 159 if f not in filenodes:
154 160 lr = filelinkrevs[f][0]
155 161 err(lr, _("in changeset but not in manifest"), f)
156 162
157 163 if havecl:
158 164 for f in sorted(filenodes):
159 165 if f not in filelinkrevs:
160 166 try:
161 167 fl = repo.file(f)
162 168 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
163 169 except:
164 170 lr = None
165 171 err(lr, _("in manifest but not in changeset"), f)
166 172
167 173 ui.status(_("checking files\n"))
168 174
169 175 storefiles = set()
170 176 for f, f2, size in repo.store.datafiles():
171 177 if not f:
172 178 err(None, _("cannot decode filename '%s'") % f2)
173 179 elif size > 0:
174 180 storefiles.add(f)
175 181
176 lrugetctx = util.lrucachefunc(repo.changectx)
177 182 files = sorted(set(filenodes) | set(filelinkrevs))
178 183 for f in files:
179 184 try:
180 185 linkrevs = filelinkrevs[f]
181 186 except KeyError:
182 187 # in manifest but not in changelog
183 188 linkrevs = []
184 189
185 190 if linkrevs:
186 191 lr = linkrevs[0]
187 192 else:
188 193 lr = None
189 194
190 195 try:
191 196 fl = repo.file(f)
192 197 except error.RevlogError, e:
193 198 err(lr, _("broken revlog! (%s)") % e, f)
194 199 continue
195 200
196 201 for ff in fl.files():
197 202 try:
198 203 storefiles.remove(ff)
199 204 except KeyError:
200 205 err(lr, _("missing revlog!"), ff)
201 206
202 207 checklog(fl, f, lr)
203 208 seen = {}
204 209 for i in fl:
205 210 revisions += 1
206 211 n = fl.node(i)
207 212 lr = checkentry(fl, i, n, seen, linkrevs, f)
208 213 if f in filenodes:
209 214 if havemf and n not in filenodes[f]:
210 215 err(lr, _("%s not in manifests") % (short(n)), f)
211 216 else:
212 217 del filenodes[f][n]
213 218
214 219 # verify contents
215 220 try:
216 221 t = fl.read(n)
217 222 rp = fl.renamed(n)
218 223 if len(t) != fl.size(i):
219 224 if len(fl.revision(n)) != fl.size(i):
220 225 err(lr, _("unpacked size is %s, %s expected") %
221 226 (len(t), fl.size(i)), f)
222 227 except Exception, inst:
223 228 exc(lr, _("unpacking %s") % short(n), inst, f)
224 229
225 230 # check renames
226 231 try:
227 232 if rp:
228 233 if lr is not None and ui.verbose:
229 234 ctx = lrugetctx(lr)
230 235 found = False
231 236 for pctx in ctx.parents():
232 237 if rp[0] in pctx:
233 238 found = True
234 239 break
235 240 if not found:
236 241 warn(_("warning: copy source of '%s' not"
237 242 " in parents of %s") % (f, ctx))
238 243 fl2 = repo.file(rp[0])
239 244 if not len(fl2):
240 245 err(lr, _("empty or missing copy source revlog %s:%s")
241 246 % (rp[0], short(rp[1])), f)
242 247 elif rp[1] == nullid:
243 248 ui.note(_("warning: %s@%s: copy source"
244 249 " revision is nullid %s:%s\n")
245 250 % (f, lr, rp[0], short(rp[1])))
246 251 else:
247 252 fl2.rev(rp[1])
248 253 except Exception, inst:
249 254 exc(lr, _("checking rename of %s") % short(n), inst, f)
250 255
251 256 # cross-check
252 257 if f in filenodes:
253 fns = [(mf.linkrev(l), n) for n,l in filenodes[f].iteritems()]
258 fns = [(lr, n) for n,lr in filenodes[f].iteritems()]
254 259 for lr, node in sorted(fns):
255 260 err(lr, _("%s in manifests not found") % short(node), f)
256 261
257 262 for f in storefiles:
258 263 warn(_("warning: orphan revlog '%s'") % f)
259 264
260 265 ui.status(_("%d files, %d changesets, %d total revisions\n") %
261 266 (len(files), len(cl), revisions))
262 267 if warnings[0]:
263 268 ui.warn(_("%d warnings encountered!\n") % warnings[0])
264 269 if errors[0]:
265 270 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
266 271 if badrevs:
267 272 ui.warn(_("(first damaged changeset appears to be %d)\n")
268 273 % min(badrevs))
269 274 return 1
General Comments 0
You need to be logged in to leave comments. Login now