##// END OF EJS Templates
verify: move err() to be a class function...
Durham Goode -
r27447:d1b91c10 default
parent child Browse files
Show More
@@ -1,366 +1,370 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 lock = repo.lock()
26 26 try:
27 27 return verifier(repo).verify()
28 28 finally:
29 29 lock.release()
30 30
31 31 def _normpath(f):
32 32 # under hg < 2.4, convert didn't sanitize paths properly, so a
33 33 # converted repo may contain repeated slashes
34 34 while '//' in f:
35 35 f = f.replace('//', '/')
36 36 return f
37 37
38 38 def _validpath(repo, path):
39 39 """Returns False if a path should NOT be treated as part of a repo.
40 40
41 41 For all in-core cases, this returns True, as we have no way for a
42 42 path to be mentioned in the history but not actually be
43 43 relevant. For narrow clones, this is important because many
44 44 filelogs will be missing, and changelog entries may mention
45 45 modified files that are outside the narrow scope.
46 46 """
47 47 return True
48 48
49 49 class verifier(object):
50 50 def __init__(self, repo):
51 51 self.repo = repo.unfiltered()
52 52 self.ui = repo.ui
53 53 self.badrevs = set()
54 54 self.errors = [0]
55 55 self.warnings = [0]
56 56 self.havecl = len(repo.changelog) > 0
57 57 self.havemf = len(repo.manifest) > 0
58 58 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
59 59 self.lrugetctx = util.lrucachefunc(repo.changectx)
60 60 self.refersmf = False
61 61 self.fncachewarned = False
62 62
63 63 def warn(self, msg):
64 64 self.ui.warn(msg + "\n")
65 65 self.warnings[0] += 1
66 66
67 def err(self, linkrev, msg, filename=None):
68 if linkrev is not None:
69 self.badrevs.add(linkrev)
70 else:
71 linkrev = '?'
72 msg = "%s: %s" % (linkrev, msg)
73 if filename:
74 msg = "%s@%s" % (filename, msg)
75 self.ui.warn(" " + msg + "\n")
76 self.errors[0] += 1
77
67 78 def verify(self):
68 79 repo = self.repo
69 80 mflinkrevs = {}
70 81 filelinkrevs = {}
71 82 filenodes = {}
72 83 revisions = 0
73 84 badrevs = self.badrevs
74 85 errors = self.errors
75 86 warnings = self.warnings
76 87 ui = repo.ui
77 88 cl = repo.changelog
78 89 mf = repo.manifest
79 90 lrugetctx = self.lrugetctx
80 91
81 92 if not repo.url().startswith('file:'):
82 93 raise error.Abort(_("cannot verify bundle or remote repos"))
83 94
84 def err(linkrev, msg, filename=None):
85 if linkrev is not None:
86 badrevs.add(linkrev)
87 else:
88 linkrev = '?'
89 msg = "%s: %s" % (linkrev, msg)
90 if filename:
91 msg = "%s@%s" % (filename, msg)
92 ui.warn(" " + msg + "\n")
93 errors[0] += 1
94 95
95 96 def exc(linkrev, msg, inst, filename=None):
96 97 if isinstance(inst, KeyboardInterrupt):
97 98 ui.warn(_("interrupted"))
98 99 raise
99 100 if not str(inst):
100 101 inst = repr(inst)
101 err(linkrev, "%s: %s" % (msg, inst), filename)
102 self.err(linkrev, "%s: %s" % (msg, inst), filename)
102 103
103 104
104 105 def checklog(obj, name, linkrev):
105 106 if not len(obj) and (havecl or havemf):
106 err(linkrev, _("empty or missing %s") % name)
107 self.err(linkrev, _("empty or missing %s") % name)
107 108 return
108 109
109 110 d = obj.checksize()
110 111 if d[0]:
111 err(None, _("data length off by %d bytes") % d[0], name)
112 self.err(None, _("data length off by %d bytes") % d[0], name)
112 113 if d[1]:
113 err(None, _("index contains %d extra bytes") % d[1], name)
114 self.err(None, _("index contains %d extra bytes") % d[1], name)
114 115
115 116 if obj.version != revlog.REVLOGV0:
116 117 if not revlogv1:
117 118 self.warn(_("warning: `%s' uses revlog format 1") % name)
118 119 elif revlogv1:
119 120 self.warn(_("warning: `%s' uses revlog format 0") % name)
120 121
121 122 def checkentry(obj, i, node, seen, linkrevs, f):
122 123 lr = obj.linkrev(obj.rev(node))
123 124 if lr < 0 or (havecl and lr not in linkrevs):
124 125 if lr < 0 or lr >= len(cl):
125 126 msg = _("rev %d points to nonexistent changeset %d")
126 127 else:
127 128 msg = _("rev %d points to unexpected changeset %d")
128 err(None, msg % (i, lr), f)
129 self.err(None, msg % (i, lr), f)
129 130 if linkrevs:
130 131 if f and len(linkrevs) > 1:
131 132 try:
132 133 # attempt to filter down to real linkrevs
133 134 linkrevs = [l for l in linkrevs
134 135 if lrugetctx(l)[f].filenode() == node]
135 136 except Exception:
136 137 pass
137 138 self.warn(_(" (expected %s)") %
138 139 " ".join(map(str, linkrevs)))
139 140 lr = None # can't be trusted
140 141
141 142 try:
142 143 p1, p2 = obj.parents(node)
143 144 if p1 not in seen and p1 != nullid:
144 err(lr, _("unknown parent 1 %s of %s") %
145 (short(p1), short(node)), f)
145 self.err(lr, _("unknown parent 1 %s of %s") %
146 (short(p1), short(node)), f)
146 147 if p2 not in seen and p2 != nullid:
147 err(lr, _("unknown parent 2 %s of %s") %
148 (short(p2), short(node)), f)
148 self.err(lr, _("unknown parent 2 %s of %s") %
149 (short(p2), short(node)), f)
149 150 except Exception as inst:
150 151 exc(lr, _("checking parents of %s") % short(node), inst, f)
151 152
152 153 if node in seen:
153 err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
154 self.err(lr, _("duplicate revision %d (%d)") %
155 (i, seen[node]), f)
154 156 seen[node] = i
155 157 return lr
156 158
157 159 if os.path.exists(repo.sjoin("journal")):
158 160 ui.warn(_("abandoned transaction found - run hg recover\n"))
159 161
160 162 revlogv1 = self.revlogv1
161 163 if ui.verbose or not revlogv1:
162 164 ui.status(_("repository uses revlog format %d\n") %
163 165 (revlogv1 and 1 or 0))
164 166
165 167 havecl = self.havecl
166 168 havemf = self.havemf
167 169
168 170 ui.status(_("checking changesets\n"))
169 171 seen = {}
170 172 checklog(cl, "changelog", 0)
171 173 total = len(repo)
172 174 for i in repo:
173 175 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
174 176 n = cl.node(i)
175 177 checkentry(cl, i, n, seen, [i], "changelog")
176 178
177 179 try:
178 180 changes = cl.read(n)
179 181 if changes[0] != nullid:
180 182 mflinkrevs.setdefault(changes[0], []).append(i)
181 183 self.refersmf = True
182 184 for f in changes[3]:
183 185 if _validpath(repo, f):
184 186 filelinkrevs.setdefault(_normpath(f), []).append(i)
185 187 except Exception as inst:
186 188 self.refersmf = True
187 189 exc(i, _("unpacking changeset %s") % short(n), inst)
188 190 ui.progress(_('checking'), None)
189 191
190 192 ui.status(_("checking manifests\n"))
191 193 seen = {}
192 194 if self.refersmf:
193 195 # Do not check manifest if there are only changelog entries with
194 196 # null manifests.
195 197 checklog(mf, "manifest", 0)
196 198 total = len(mf)
197 199 for i in mf:
198 200 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
199 201 n = mf.node(i)
200 202 lr = checkentry(mf, i, n, seen, mflinkrevs.get(n, []), "manifest")
201 203 if n in mflinkrevs:
202 204 del mflinkrevs[n]
203 205 else:
204 err(lr, _("%s not in changesets") % short(n), "manifest")
206 self.err(lr, _("%s not in changesets") % short(n), "manifest")
205 207
206 208 try:
207 209 for f, fn in mf.readdelta(n).iteritems():
208 210 if not f:
209 err(lr, _("file without name in manifest"))
211 self.err(lr, _("file without name in manifest"))
210 212 elif f != "/dev/null": # ignore this in very old repos
211 213 if _validpath(repo, f):
212 214 filenodes.setdefault(
213 215 _normpath(f), {}).setdefault(fn, lr)
214 216 except Exception as inst:
215 217 exc(lr, _("reading manifest delta %s") % short(n), inst)
216 218 ui.progress(_('checking'), None)
217 219
218 220 ui.status(_("crosschecking files in changesets and manifests\n"))
219 221
220 222 total = len(mflinkrevs) + len(filelinkrevs) + len(filenodes)
221 223 count = 0
222 224 if havemf:
223 225 for c, m in sorted([(c, m) for m in mflinkrevs
224 226 for c in mflinkrevs[m]]):
225 227 count += 1
226 228 if m == nullid:
227 229 continue
228 230 ui.progress(_('crosschecking'), count, total=total)
229 err(c, _("changeset refers to unknown manifest %s") % short(m))
231 self.err(c, _("changeset refers to unknown manifest %s") %
232 short(m))
230 233 mflinkrevs = None # del is bad here due to scope issues
231 234
232 235 for f in sorted(filelinkrevs):
233 236 count += 1
234 237 ui.progress(_('crosschecking'), count, total=total)
235 238 if f not in filenodes:
236 239 lr = filelinkrevs[f][0]
237 err(lr, _("in changeset but not in manifest"), f)
240 self.err(lr, _("in changeset but not in manifest"), f)
238 241
239 242 if havecl:
240 243 for f in sorted(filenodes):
241 244 count += 1
242 245 ui.progress(_('crosschecking'), count, total=total)
243 246 if f not in filelinkrevs:
244 247 try:
245 248 fl = repo.file(f)
246 249 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
247 250 except Exception:
248 251 lr = None
249 err(lr, _("in manifest but not in changeset"), f)
252 self.err(lr, _("in manifest but not in changeset"), f)
250 253
251 254 ui.progress(_('crosschecking'), None)
252 255
253 256 ui.status(_("checking files\n"))
254 257
255 258 storefiles = set()
256 259 for f, f2, size in repo.store.datafiles():
257 260 if not f:
258 err(None, _("cannot decode filename '%s'") % f2)
261 self.err(None, _("cannot decode filename '%s'") % f2)
259 262 elif size > 0 or not revlogv1:
260 263 storefiles.add(_normpath(f))
261 264
262 265 files = sorted(set(filenodes) | set(filelinkrevs))
263 266 total = len(files)
264 267 for i, f in enumerate(files):
265 268 ui.progress(_('checking'), i, item=f, total=total)
266 269 try:
267 270 linkrevs = filelinkrevs[f]
268 271 except KeyError:
269 272 # in manifest but not in changelog
270 273 linkrevs = []
271 274
272 275 if linkrevs:
273 276 lr = linkrevs[0]
274 277 else:
275 278 lr = None
276 279
277 280 try:
278 281 fl = repo.file(f)
279 282 except error.RevlogError as e:
280 err(lr, _("broken revlog! (%s)") % e, f)
283 self.err(lr, _("broken revlog! (%s)") % e, f)
281 284 continue
282 285
283 286 for ff in fl.files():
284 287 try:
285 288 storefiles.remove(ff)
286 289 except KeyError:
287 290 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
288 291 self.fncachewarned = True
289 292
290 293 checklog(fl, f, lr)
291 294 seen = {}
292 295 rp = None
293 296 for i in fl:
294 297 revisions += 1
295 298 n = fl.node(i)
296 299 lr = checkentry(fl, i, n, seen, linkrevs, f)
297 300 if f in filenodes:
298 301 if havemf and n not in filenodes[f]:
299 err(lr, _("%s not in manifests") % (short(n)), f)
302 self.err(lr, _("%s not in manifests") % (short(n)), f)
300 303 else:
301 304 del filenodes[f][n]
302 305
303 306 # verify contents
304 307 try:
305 308 l = len(fl.read(n))
306 309 rp = fl.renamed(n)
307 310 if l != fl.size(i):
308 311 if len(fl.revision(n)) != fl.size(i):
309 err(lr, _("unpacked size is %s, %s expected") %
310 (l, fl.size(i)), f)
312 self.err(lr, _("unpacked size is %s, %s expected") %
313 (l, fl.size(i)), f)
311 314 except error.CensoredNodeError:
312 315 # experimental config: censor.policy
313 316 if ui.config("censor", "policy", "abort") == "abort":
314 err(lr, _("censored file data"), f)
317 self.err(lr, _("censored file data"), f)
315 318 except Exception as inst:
316 319 exc(lr, _("unpacking %s") % short(n), inst, f)
317 320
318 321 # check renames
319 322 try:
320 323 if rp:
321 324 if lr is not None and ui.verbose:
322 325 ctx = lrugetctx(lr)
323 326 found = False
324 327 for pctx in ctx.parents():
325 328 if rp[0] in pctx:
326 329 found = True
327 330 break
328 331 if not found:
329 332 self.warn(_("warning: copy source of '%s' not"
330 333 " in parents of %s") % (f, ctx))
331 334 fl2 = repo.file(rp[0])
332 335 if not len(fl2):
333 err(lr, _("empty or missing copy source revlog "
334 "%s:%s") % (rp[0], short(rp[1])), f)
336 self.err(lr, _("empty or missing copy source "
337 "revlog %s:%s") % (rp[0], short(rp[1])), f)
335 338 elif rp[1] == nullid:
336 339 ui.note(_("warning: %s@%s: copy source"
337 340 " revision is nullid %s:%s\n")
338 341 % (f, lr, rp[0], short(rp[1])))
339 342 else:
340 343 fl2.rev(rp[1])
341 344 except Exception as inst:
342 345 exc(lr, _("checking rename of %s") % short(n), inst, f)
343 346
344 347 # cross-check
345 348 if f in filenodes:
346 349 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
347 350 for lr, node in sorted(fns):
348 err(lr, _("%s in manifests not found") % short(node), f)
351 self.err(lr, _("%s in manifests not found") % short(node),
352 f)
349 353 ui.progress(_('checking'), None)
350 354
351 355 for f in storefiles:
352 356 self.warn(_("warning: orphan revlog '%s'") % f)
353 357
354 358 ui.status(_("%d files, %d changesets, %d total revisions\n") %
355 359 (len(files), len(cl), revisions))
356 360 if warnings[0]:
357 361 ui.warn(_("%d warnings encountered!\n") % warnings[0])
358 362 if self.fncachewarned:
359 363 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
360 364 'corrupt fncache\n'))
361 365 if errors[0]:
362 366 ui.warn(_("%d integrity errors encountered!\n") % errors[0])
363 367 if badrevs:
364 368 ui.warn(_("(first damaged changeset appears to be %d)\n")
365 369 % min(badrevs))
366 370 return 1
General Comments 0
You need to be logged in to leave comments. Login now