##// END OF EJS Templates
verify: make the `exc` method private...
marmoute -
r42032:cfe08588 default
parent child Browse files
Show More
@@ -1,460 +1,461 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24
25 25 def verify(repo):
26 26 with repo.lock():
27 27 return verifier(repo).verify()
28 28
29 29 def _normpath(f):
30 30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 31 # converted repo may contain repeated slashes
32 32 while '//' in f:
33 33 f = f.replace('//', '/')
34 34 return f
35 35
36 36 class verifier(object):
37 37 def __init__(self, repo):
38 38 self.repo = repo.unfiltered()
39 39 self.ui = repo.ui
40 40 self.match = repo.narrowmatch()
41 41 self.badrevs = set()
42 42 self.errors = 0
43 43 self.warnings = 0
44 44 self.havecl = len(repo.changelog) > 0
45 45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 48 self.refersmf = False
49 49 self.fncachewarned = False
50 50 # developer config: verify.skipflags
51 51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 52 self.warnorphanstorefiles = True
53 53
54 54 def _warn(self, msg):
55 55 """record a "warning" level issue"""
56 56 self.ui.warn(msg + "\n")
57 57 self.warnings += 1
58 58
59 59 def _err(self, linkrev, msg, filename=None):
60 60 """record a "error" level issue"""
61 61 if linkrev is not None:
62 62 self.badrevs.add(linkrev)
63 63 linkrev = "%d" % linkrev
64 64 else:
65 65 linkrev = '?'
66 66 msg = "%s: %s" % (linkrev, msg)
67 67 if filename:
68 68 msg = "%s@%s" % (filename, msg)
69 69 self.ui.warn(" " + msg + "\n")
70 70 self.errors += 1
71 71
72 def exc(self, linkrev, msg, inst, filename=None):
72 def _exc(self, linkrev, msg, inst, filename=None):
73 73 """record exception raised during the verify process"""
74 74 fmsg = pycompat.bytestr(inst)
75 75 if not fmsg:
76 76 fmsg = pycompat.byterepr(inst)
77 77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
78 78
79 79 def checklog(self, obj, name, linkrev):
80 80 if not len(obj) and (self.havecl or self.havemf):
81 81 self._err(linkrev, _("empty or missing %s") % name)
82 82 return
83 83
84 84 d = obj.checksize()
85 85 if d[0]:
86 86 self.err(None, _("data length off by %d bytes") % d[0], name)
87 87 if d[1]:
88 88 self.err(None, _("index contains %d extra bytes") % d[1], name)
89 89
90 90 if obj.version != revlog.REVLOGV0:
91 91 if not self.revlogv1:
92 92 self._warn(_("warning: `%s' uses revlog format 1") % name)
93 93 elif self.revlogv1:
94 94 self._warn(_("warning: `%s' uses revlog format 0") % name)
95 95
96 96 def checkentry(self, obj, i, node, seen, linkrevs, f):
97 97 lr = obj.linkrev(obj.rev(node))
98 98 if lr < 0 or (self.havecl and lr not in linkrevs):
99 99 if lr < 0 or lr >= len(self.repo.changelog):
100 100 msg = _("rev %d points to nonexistent changeset %d")
101 101 else:
102 102 msg = _("rev %d points to unexpected changeset %d")
103 103 self._err(None, msg % (i, lr), f)
104 104 if linkrevs:
105 105 if f and len(linkrevs) > 1:
106 106 try:
107 107 # attempt to filter down to real linkrevs
108 108 linkrevs = [l for l in linkrevs
109 109 if self.lrugetctx(l)[f].filenode() == node]
110 110 except Exception:
111 111 pass
112 112 self._warn(_(" (expected %s)") % " ".join
113 113 (map(pycompat.bytestr, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self._err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self._err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
125 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 repo = self.repo
134 134
135 135 ui = repo.ui
136 136
137 137 if not repo.url().startswith('file:'):
138 138 raise error.Abort(_("cannot verify bundle or remote repos"))
139 139
140 140 if os.path.exists(repo.sjoin("journal")):
141 141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142 142
143 143 if ui.verbose or not self.revlogv1:
144 144 ui.status(_("repository uses revlog format %d\n") %
145 145 (self.revlogv1 and 1 or 0))
146 146
147 147 mflinkrevs, filelinkrevs = self._verifychangelog()
148 148
149 149 filenodes = self._verifymanifest(mflinkrevs)
150 150 del mflinkrevs
151 151
152 152 self._crosscheckfiles(filelinkrevs, filenodes)
153 153
154 154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155 155
156 156 ui.status(_("checked %d changesets with %d changes to %d files\n") %
157 157 (len(repo.changelog), filerevisions, totalfiles))
158 158 if self.warnings:
159 159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 160 if self.fncachewarned:
161 161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 162 'corrupt fncache\n'))
163 163 if self.errors:
164 164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 165 if self.badrevs:
166 166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 167 % min(self.badrevs))
168 168 return 1
169 169
170 170 def _verifychangelog(self):
171 171 ui = self.ui
172 172 repo = self.repo
173 173 match = self.match
174 174 cl = repo.changelog
175 175
176 176 ui.status(_("checking changesets\n"))
177 177 mflinkrevs = {}
178 178 filelinkrevs = {}
179 179 seen = {}
180 180 self.checklog(cl, "changelog", 0)
181 181 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
182 182 total=len(repo))
183 183 for i in repo:
184 184 progress.update(i)
185 185 n = cl.node(i)
186 186 self.checkentry(cl, i, n, seen, [i], "changelog")
187 187
188 188 try:
189 189 changes = cl.read(n)
190 190 if changes[0] != nullid:
191 191 mflinkrevs.setdefault(changes[0], []).append(i)
192 192 self.refersmf = True
193 193 for f in changes[3]:
194 194 if match(f):
195 195 filelinkrevs.setdefault(_normpath(f), []).append(i)
196 196 except Exception as inst:
197 197 self.refersmf = True
198 self.exc(i, _("unpacking changeset %s") % short(n), inst)
198 self._exc(i, _("unpacking changeset %s") % short(n), inst)
199 199 progress.complete()
200 200 return mflinkrevs, filelinkrevs
201 201
202 202 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
203 203 subdirprogress=None):
204 204 repo = self.repo
205 205 ui = self.ui
206 206 match = self.match
207 207 mfl = self.repo.manifestlog
208 208 mf = mfl.getstorage(dir)
209 209
210 210 if not dir:
211 211 self.ui.status(_("checking manifests\n"))
212 212
213 213 filenodes = {}
214 214 subdirnodes = {}
215 215 seen = {}
216 216 label = "manifest"
217 217 if dir:
218 218 label = dir
219 219 revlogfiles = mf.files()
220 220 storefiles.difference_update(revlogfiles)
221 221 if subdirprogress: # should be true since we're in a subdirectory
222 222 subdirprogress.increment()
223 223 if self.refersmf:
224 224 # Do not check manifest if there are only changelog entries with
225 225 # null manifests.
226 226 self.checklog(mf, label, 0)
227 227 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
228 228 total=len(mf))
229 229 for i in mf:
230 230 if not dir:
231 231 progress.update(i)
232 232 n = mf.node(i)
233 233 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
234 234 if n in mflinkrevs:
235 235 del mflinkrevs[n]
236 236 elif dir:
237 237 self._err(lr, _("%s not in parent-directory manifest") %
238 238 short(n), label)
239 239 else:
240 240 self._err(lr, _("%s not in changesets") % short(n), label)
241 241
242 242 try:
243 243 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
244 244 for f, fn, fl in mfdelta.iterentries():
245 245 if not f:
246 246 self._err(lr, _("entry without name in manifest"))
247 247 elif f == "/dev/null": # ignore this in very old repos
248 248 continue
249 249 fullpath = dir + _normpath(f)
250 250 if fl == 't':
251 251 if not match.visitdir(fullpath):
252 252 continue
253 253 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
254 254 fn, []).append(lr)
255 255 else:
256 256 if not match(fullpath):
257 257 continue
258 258 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
259 259 except Exception as inst:
260 self.exc(lr, _("reading delta %s") % short(n), inst, label)
260 self._exc(lr, _("reading delta %s") % short(n), inst, label)
261 261 if not dir:
262 262 progress.complete()
263 263
264 264 if self.havemf:
265 265 for c, m in sorted([(c, m) for m in mflinkrevs
266 266 for c in mflinkrevs[m]]):
267 267 if dir:
268 268 self._err(c, _("parent-directory manifest refers to unknown"
269 269 " revision %s") % short(m), label)
270 270 else:
271 271 self._err(c, _("changeset refers to unknown revision %s") %
272 272 short(m), label)
273 273
274 274 if not dir and subdirnodes:
275 275 self.ui.status(_("checking directory manifests\n"))
276 276 storefiles = set()
277 277 subdirs = set()
278 278 revlogv1 = self.revlogv1
279 279 for f, f2, size in repo.store.datafiles():
280 280 if not f:
281 281 self._err(None, _("cannot decode filename '%s'") % f2)
282 282 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
283 283 storefiles.add(_normpath(f))
284 284 subdirs.add(os.path.dirname(f))
285 285 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
286 286 total=len(subdirs))
287 287
288 288 for subdir, linkrevs in subdirnodes.iteritems():
289 289 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
290 290 subdirprogress)
291 291 for f, onefilenodes in subdirfilenodes.iteritems():
292 292 filenodes.setdefault(f, {}).update(onefilenodes)
293 293
294 294 if not dir and subdirnodes:
295 295 subdirprogress.complete()
296 296 if self.warnorphanstorefiles:
297 297 for f in sorted(storefiles):
298 298 self._warn(_("warning: orphan data file '%s'") % f)
299 299
300 300 return filenodes
301 301
302 302 def _crosscheckfiles(self, filelinkrevs, filenodes):
303 303 repo = self.repo
304 304 ui = self.ui
305 305 ui.status(_("crosschecking files in changesets and manifests\n"))
306 306
307 307 total = len(filelinkrevs) + len(filenodes)
308 308 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
309 309 total=total)
310 310 if self.havemf:
311 311 for f in sorted(filelinkrevs):
312 312 progress.increment()
313 313 if f not in filenodes:
314 314 lr = filelinkrevs[f][0]
315 315 self._err(lr, _("in changeset but not in manifest"), f)
316 316
317 317 if self.havecl:
318 318 for f in sorted(filenodes):
319 319 progress.increment()
320 320 if f not in filelinkrevs:
321 321 try:
322 322 fl = repo.file(f)
323 323 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
324 324 except Exception:
325 325 lr = None
326 326 self._err(lr, _("in manifest but not in changeset"), f)
327 327
328 328 progress.complete()
329 329
330 330 def _verifyfiles(self, filenodes, filelinkrevs):
331 331 repo = self.repo
332 332 ui = self.ui
333 333 lrugetctx = self.lrugetctx
334 334 revlogv1 = self.revlogv1
335 335 havemf = self.havemf
336 336 ui.status(_("checking files\n"))
337 337
338 338 storefiles = set()
339 339 for f, f2, size in repo.store.datafiles():
340 340 if not f:
341 341 self._err(None, _("cannot decode filename '%s'") % f2)
342 342 elif (size > 0 or not revlogv1) and f.startswith('data/'):
343 343 storefiles.add(_normpath(f))
344 344
345 345 state = {
346 346 # TODO this assumes revlog storage for changelog.
347 347 'expectedversion': self.repo.changelog.version & 0xFFFF,
348 348 'skipflags': self.skipflags,
349 349 # experimental config: censor.policy
350 350 'erroroncensored': ui.config('censor', 'policy') == 'abort',
351 351 }
352 352
353 353 files = sorted(set(filenodes) | set(filelinkrevs))
354 354 revisions = 0
355 355 progress = ui.makeprogress(_('checking'), unit=_('files'),
356 356 total=len(files))
357 357 for i, f in enumerate(files):
358 358 progress.update(i, item=f)
359 359 try:
360 360 linkrevs = filelinkrevs[f]
361 361 except KeyError:
362 362 # in manifest but not in changelog
363 363 linkrevs = []
364 364
365 365 if linkrevs:
366 366 lr = linkrevs[0]
367 367 else:
368 368 lr = None
369 369
370 370 try:
371 371 fl = repo.file(f)
372 372 except error.StorageError as e:
373 373 self._err(lr, _("broken revlog! (%s)") % e, f)
374 374 continue
375 375
376 376 for ff in fl.files():
377 377 try:
378 378 storefiles.remove(ff)
379 379 except KeyError:
380 380 if self.warnorphanstorefiles:
381 381 self._warn(_(" warning: revlog '%s' not in fncache!") %
382 382 ff)
383 383 self.fncachewarned = True
384 384
385 385 if not len(fl) and (self.havecl or self.havemf):
386 386 self._err(lr, _("empty or missing %s") % f)
387 387 else:
388 388 # Guard against implementations not setting this.
389 389 state['skipread'] = set()
390 390 for problem in fl.verifyintegrity(state):
391 391 if problem.node is not None:
392 392 linkrev = fl.linkrev(fl.rev(problem.node))
393 393 else:
394 394 linkrev = None
395 395
396 396 if problem.warning:
397 397 self._warn(problem.warning)
398 398 elif problem.error:
399 399 self._err(linkrev if linkrev is not None else lr,
400 400 problem.error, f)
401 401 else:
402 402 raise error.ProgrammingError(
403 403 'problem instance does not set warning or error '
404 404 'attribute: %s' % problem.msg)
405 405
406 406 seen = {}
407 407 for i in fl:
408 408 revisions += 1
409 409 n = fl.node(i)
410 410 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
411 411 if f in filenodes:
412 412 if havemf and n not in filenodes[f]:
413 413 self._err(lr, _("%s not in manifests") % (short(n)), f)
414 414 else:
415 415 del filenodes[f][n]
416 416
417 417 if n in state['skipread']:
418 418 continue
419 419
420 420 # check renames
421 421 try:
422 422 # This requires resolving fulltext (at least on revlogs). We
423 423 # may want ``verifyintegrity()`` to pass a set of nodes with
424 424 # rename metadata as an optimization.
425 425 rp = fl.renamed(n)
426 426 if rp:
427 427 if lr is not None and ui.verbose:
428 428 ctx = lrugetctx(lr)
429 429 if not any(rp[0] in pctx for pctx in ctx.parents()):
430 430 self._warn(_("warning: copy source of '%s' not"
431 431 " in parents of %s") % (f, ctx))
432 432 fl2 = repo.file(rp[0])
433 433 if not len(fl2):
434 434 self._err(lr,
435 435 _("empty or missing copy source revlog "
436 436 "%s:%s") % (rp[0],
437 437 short(rp[1])),
438 438 f)
439 439 elif rp[1] == nullid:
440 440 ui.note(_("warning: %s@%s: copy source"
441 441 " revision is nullid %s:%s\n")
442 442 % (f, lr, rp[0], short(rp[1])))
443 443 else:
444 444 fl2.rev(rp[1])
445 445 except Exception as inst:
446 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
446 self._exc(lr, _("checking rename of %s") % short(n),
447 inst, f)
447 448
448 449 # cross-check
449 450 if f in filenodes:
450 451 fns = [(v, k) for k, v in filenodes[f].iteritems()]
451 452 for lr, node in sorted(fns):
452 453 self._err(lr, _("manifest refers to unknown revision %s") %
453 454 short(node), f)
454 455 progress.complete()
455 456
456 457 if self.warnorphanstorefiles:
457 458 for f in sorted(storefiles):
458 459 self._warn(_("warning: orphan data file '%s'") % f)
459 460
460 461 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now