##// END OF EJS Templates
verify: make `err` a private method...
marmoute -
r42030:7eaf4b1a default
parent child Browse files
Show More
@@ -1,456 +1,459 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24
25 25 def verify(repo):
26 26 with repo.lock():
27 27 return verifier(repo).verify()
28 28
29 29 def _normpath(f):
30 30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 31 # converted repo may contain repeated slashes
32 32 while '//' in f:
33 33 f = f.replace('//', '/')
34 34 return f
35 35
36 36 class verifier(object):
37 37 def __init__(self, repo):
38 38 self.repo = repo.unfiltered()
39 39 self.ui = repo.ui
40 40 self.match = repo.narrowmatch()
41 41 self.badrevs = set()
42 42 self.errors = 0
43 43 self.warnings = 0
44 44 self.havecl = len(repo.changelog) > 0
45 45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 48 self.refersmf = False
49 49 self.fncachewarned = False
50 50 # developer config: verify.skipflags
51 51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 52 self.warnorphanstorefiles = True
53 53
54 54 def _warn(self, msg):
55 55 """record a "warning" level issue"""
56 56 self.ui.warn(msg + "\n")
57 57 self.warnings += 1
58 58
59 def err(self, linkrev, msg, filename=None):
59 def _err(self, linkrev, msg, filename=None):
60 60 """record a "error" level issue"""
61 61 if linkrev is not None:
62 62 self.badrevs.add(linkrev)
63 63 linkrev = "%d" % linkrev
64 64 else:
65 65 linkrev = '?'
66 66 msg = "%s: %s" % (linkrev, msg)
67 67 if filename:
68 68 msg = "%s@%s" % (filename, msg)
69 69 self.ui.warn(" " + msg + "\n")
70 70 self.errors += 1
71 71
72 72 def exc(self, linkrev, msg, inst, filename=None):
73 73 fmsg = pycompat.bytestr(inst)
74 74 if not fmsg:
75 75 fmsg = pycompat.byterepr(inst)
76 self.err(linkrev, "%s: %s" % (msg, fmsg), filename)
76 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
77 77
78 78 def checklog(self, obj, name, linkrev):
79 79 if not len(obj) and (self.havecl or self.havemf):
80 self.err(linkrev, _("empty or missing %s") % name)
80 self._err(linkrev, _("empty or missing %s") % name)
81 81 return
82 82
83 83 d = obj.checksize()
84 84 if d[0]:
85 85 self.err(None, _("data length off by %d bytes") % d[0], name)
86 86 if d[1]:
87 87 self.err(None, _("index contains %d extra bytes") % d[1], name)
88 88
89 89 if obj.version != revlog.REVLOGV0:
90 90 if not self.revlogv1:
91 91 self._warn(_("warning: `%s' uses revlog format 1") % name)
92 92 elif self.revlogv1:
93 93 self._warn(_("warning: `%s' uses revlog format 0") % name)
94 94
95 95 def checkentry(self, obj, i, node, seen, linkrevs, f):
96 96 lr = obj.linkrev(obj.rev(node))
97 97 if lr < 0 or (self.havecl and lr not in linkrevs):
98 98 if lr < 0 or lr >= len(self.repo.changelog):
99 99 msg = _("rev %d points to nonexistent changeset %d")
100 100 else:
101 101 msg = _("rev %d points to unexpected changeset %d")
102 self.err(None, msg % (i, lr), f)
102 self._err(None, msg % (i, lr), f)
103 103 if linkrevs:
104 104 if f and len(linkrevs) > 1:
105 105 try:
106 106 # attempt to filter down to real linkrevs
107 107 linkrevs = [l for l in linkrevs
108 108 if self.lrugetctx(l)[f].filenode() == node]
109 109 except Exception:
110 110 pass
111 111 self._warn(_(" (expected %s)") % " ".join
112 112 (map(pycompat.bytestr, linkrevs)))
113 113 lr = None # can't be trusted
114 114
115 115 try:
116 116 p1, p2 = obj.parents(node)
117 117 if p1 not in seen and p1 != nullid:
118 self.err(lr, _("unknown parent 1 %s of %s") %
118 self._err(lr, _("unknown parent 1 %s of %s") %
119 119 (short(p1), short(node)), f)
120 120 if p2 not in seen and p2 != nullid:
121 self.err(lr, _("unknown parent 2 %s of %s") %
121 self._err(lr, _("unknown parent 2 %s of %s") %
122 122 (short(p2), short(node)), f)
123 123 except Exception as inst:
124 124 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
125 125
126 126 if node in seen:
127 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
127 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
128 128 seen[node] = i
129 129 return lr
130 130
131 131 def verify(self):
132 132 repo = self.repo
133 133
134 134 ui = repo.ui
135 135
136 136 if not repo.url().startswith('file:'):
137 137 raise error.Abort(_("cannot verify bundle or remote repos"))
138 138
139 139 if os.path.exists(repo.sjoin("journal")):
140 140 ui.warn(_("abandoned transaction found - run hg recover\n"))
141 141
142 142 if ui.verbose or not self.revlogv1:
143 143 ui.status(_("repository uses revlog format %d\n") %
144 144 (self.revlogv1 and 1 or 0))
145 145
146 146 mflinkrevs, filelinkrevs = self._verifychangelog()
147 147
148 148 filenodes = self._verifymanifest(mflinkrevs)
149 149 del mflinkrevs
150 150
151 151 self._crosscheckfiles(filelinkrevs, filenodes)
152 152
153 153 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
154 154
155 155 ui.status(_("checked %d changesets with %d changes to %d files\n") %
156 156 (len(repo.changelog), filerevisions, totalfiles))
157 157 if self.warnings:
158 158 ui.warn(_("%d warnings encountered!\n") % self.warnings)
159 159 if self.fncachewarned:
160 160 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
161 161 'corrupt fncache\n'))
162 162 if self.errors:
163 163 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
164 164 if self.badrevs:
165 165 ui.warn(_("(first damaged changeset appears to be %d)\n")
166 166 % min(self.badrevs))
167 167 return 1
168 168
169 169 def _verifychangelog(self):
170 170 ui = self.ui
171 171 repo = self.repo
172 172 match = self.match
173 173 cl = repo.changelog
174 174
175 175 ui.status(_("checking changesets\n"))
176 176 mflinkrevs = {}
177 177 filelinkrevs = {}
178 178 seen = {}
179 179 self.checklog(cl, "changelog", 0)
180 180 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
181 181 total=len(repo))
182 182 for i in repo:
183 183 progress.update(i)
184 184 n = cl.node(i)
185 185 self.checkentry(cl, i, n, seen, [i], "changelog")
186 186
187 187 try:
188 188 changes = cl.read(n)
189 189 if changes[0] != nullid:
190 190 mflinkrevs.setdefault(changes[0], []).append(i)
191 191 self.refersmf = True
192 192 for f in changes[3]:
193 193 if match(f):
194 194 filelinkrevs.setdefault(_normpath(f), []).append(i)
195 195 except Exception as inst:
196 196 self.refersmf = True
197 197 self.exc(i, _("unpacking changeset %s") % short(n), inst)
198 198 progress.complete()
199 199 return mflinkrevs, filelinkrevs
200 200
201 201 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
202 202 subdirprogress=None):
203 203 repo = self.repo
204 204 ui = self.ui
205 205 match = self.match
206 206 mfl = self.repo.manifestlog
207 207 mf = mfl.getstorage(dir)
208 208
209 209 if not dir:
210 210 self.ui.status(_("checking manifests\n"))
211 211
212 212 filenodes = {}
213 213 subdirnodes = {}
214 214 seen = {}
215 215 label = "manifest"
216 216 if dir:
217 217 label = dir
218 218 revlogfiles = mf.files()
219 219 storefiles.difference_update(revlogfiles)
220 220 if subdirprogress: # should be true since we're in a subdirectory
221 221 subdirprogress.increment()
222 222 if self.refersmf:
223 223 # Do not check manifest if there are only changelog entries with
224 224 # null manifests.
225 225 self.checklog(mf, label, 0)
226 226 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
227 227 total=len(mf))
228 228 for i in mf:
229 229 if not dir:
230 230 progress.update(i)
231 231 n = mf.node(i)
232 232 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
233 233 if n in mflinkrevs:
234 234 del mflinkrevs[n]
235 235 elif dir:
236 self.err(lr, _("%s not in parent-directory manifest") %
236 self._err(lr, _("%s not in parent-directory manifest") %
237 237 short(n), label)
238 238 else:
239 self.err(lr, _("%s not in changesets") % short(n), label)
239 self._err(lr, _("%s not in changesets") % short(n), label)
240 240
241 241 try:
242 242 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
243 243 for f, fn, fl in mfdelta.iterentries():
244 244 if not f:
245 self.err(lr, _("entry without name in manifest"))
245 self._err(lr, _("entry without name in manifest"))
246 246 elif f == "/dev/null": # ignore this in very old repos
247 247 continue
248 248 fullpath = dir + _normpath(f)
249 249 if fl == 't':
250 250 if not match.visitdir(fullpath):
251 251 continue
252 252 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
253 253 fn, []).append(lr)
254 254 else:
255 255 if not match(fullpath):
256 256 continue
257 257 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
258 258 except Exception as inst:
259 259 self.exc(lr, _("reading delta %s") % short(n), inst, label)
260 260 if not dir:
261 261 progress.complete()
262 262
263 263 if self.havemf:
264 264 for c, m in sorted([(c, m) for m in mflinkrevs
265 265 for c in mflinkrevs[m]]):
266 266 if dir:
267 self.err(c, _("parent-directory manifest refers to unknown "
268 "revision %s") % short(m), label)
267 self._err(c, _("parent-directory manifest refers to unknown"
268 " revision %s") % short(m), label)
269 269 else:
270 self.err(c, _("changeset refers to unknown revision %s") %
271 short(m), label)
270 self._err(c, _("changeset refers to unknown revision %s") %
271 short(m), label)
272 272
273 273 if not dir and subdirnodes:
274 274 self.ui.status(_("checking directory manifests\n"))
275 275 storefiles = set()
276 276 subdirs = set()
277 277 revlogv1 = self.revlogv1
278 278 for f, f2, size in repo.store.datafiles():
279 279 if not f:
280 self.err(None, _("cannot decode filename '%s'") % f2)
280 self._err(None, _("cannot decode filename '%s'") % f2)
281 281 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
282 282 storefiles.add(_normpath(f))
283 283 subdirs.add(os.path.dirname(f))
284 284 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
285 285 total=len(subdirs))
286 286
287 287 for subdir, linkrevs in subdirnodes.iteritems():
288 288 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
289 289 subdirprogress)
290 290 for f, onefilenodes in subdirfilenodes.iteritems():
291 291 filenodes.setdefault(f, {}).update(onefilenodes)
292 292
293 293 if not dir and subdirnodes:
294 294 subdirprogress.complete()
295 295 if self.warnorphanstorefiles:
296 296 for f in sorted(storefiles):
297 297 self._warn(_("warning: orphan data file '%s'") % f)
298 298
299 299 return filenodes
300 300
301 301 def _crosscheckfiles(self, filelinkrevs, filenodes):
302 302 repo = self.repo
303 303 ui = self.ui
304 304 ui.status(_("crosschecking files in changesets and manifests\n"))
305 305
306 306 total = len(filelinkrevs) + len(filenodes)
307 307 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
308 308 total=total)
309 309 if self.havemf:
310 310 for f in sorted(filelinkrevs):
311 311 progress.increment()
312 312 if f not in filenodes:
313 313 lr = filelinkrevs[f][0]
314 self.err(lr, _("in changeset but not in manifest"), f)
314 self._err(lr, _("in changeset but not in manifest"), f)
315 315
316 316 if self.havecl:
317 317 for f in sorted(filenodes):
318 318 progress.increment()
319 319 if f not in filelinkrevs:
320 320 try:
321 321 fl = repo.file(f)
322 322 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
323 323 except Exception:
324 324 lr = None
325 self.err(lr, _("in manifest but not in changeset"), f)
325 self._err(lr, _("in manifest but not in changeset"), f)
326 326
327 327 progress.complete()
328 328
329 329 def _verifyfiles(self, filenodes, filelinkrevs):
330 330 repo = self.repo
331 331 ui = self.ui
332 332 lrugetctx = self.lrugetctx
333 333 revlogv1 = self.revlogv1
334 334 havemf = self.havemf
335 335 ui.status(_("checking files\n"))
336 336
337 337 storefiles = set()
338 338 for f, f2, size in repo.store.datafiles():
339 339 if not f:
340 self.err(None, _("cannot decode filename '%s'") % f2)
340 self._err(None, _("cannot decode filename '%s'") % f2)
341 341 elif (size > 0 or not revlogv1) and f.startswith('data/'):
342 342 storefiles.add(_normpath(f))
343 343
344 344 state = {
345 345 # TODO this assumes revlog storage for changelog.
346 346 'expectedversion': self.repo.changelog.version & 0xFFFF,
347 347 'skipflags': self.skipflags,
348 348 # experimental config: censor.policy
349 349 'erroroncensored': ui.config('censor', 'policy') == 'abort',
350 350 }
351 351
352 352 files = sorted(set(filenodes) | set(filelinkrevs))
353 353 revisions = 0
354 354 progress = ui.makeprogress(_('checking'), unit=_('files'),
355 355 total=len(files))
356 356 for i, f in enumerate(files):
357 357 progress.update(i, item=f)
358 358 try:
359 359 linkrevs = filelinkrevs[f]
360 360 except KeyError:
361 361 # in manifest but not in changelog
362 362 linkrevs = []
363 363
364 364 if linkrevs:
365 365 lr = linkrevs[0]
366 366 else:
367 367 lr = None
368 368
369 369 try:
370 370 fl = repo.file(f)
371 371 except error.StorageError as e:
372 self.err(lr, _("broken revlog! (%s)") % e, f)
372 self._err(lr, _("broken revlog! (%s)") % e, f)
373 373 continue
374 374
375 375 for ff in fl.files():
376 376 try:
377 377 storefiles.remove(ff)
378 378 except KeyError:
379 379 if self.warnorphanstorefiles:
380 380 self._warn(_(" warning: revlog '%s' not in fncache!") %
381 381 ff)
382 382 self.fncachewarned = True
383 383
384 384 if not len(fl) and (self.havecl or self.havemf):
385 self.err(lr, _("empty or missing %s") % f)
385 self._err(lr, _("empty or missing %s") % f)
386 386 else:
387 387 # Guard against implementations not setting this.
388 388 state['skipread'] = set()
389 389 for problem in fl.verifyintegrity(state):
390 390 if problem.node is not None:
391 391 linkrev = fl.linkrev(fl.rev(problem.node))
392 392 else:
393 393 linkrev = None
394 394
395 395 if problem.warning:
396 396 self._warn(problem.warning)
397 397 elif problem.error:
398 self.err(linkrev if linkrev is not None else lr,
399 problem.error, f)
398 self._err(linkrev if linkrev is not None else lr,
399 problem.error, f)
400 400 else:
401 401 raise error.ProgrammingError(
402 402 'problem instance does not set warning or error '
403 403 'attribute: %s' % problem.msg)
404 404
405 405 seen = {}
406 406 for i in fl:
407 407 revisions += 1
408 408 n = fl.node(i)
409 409 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
410 410 if f in filenodes:
411 411 if havemf and n not in filenodes[f]:
412 self.err(lr, _("%s not in manifests") % (short(n)), f)
412 self._err(lr, _("%s not in manifests") % (short(n)), f)
413 413 else:
414 414 del filenodes[f][n]
415 415
416 416 if n in state['skipread']:
417 417 continue
418 418
419 419 # check renames
420 420 try:
421 421 # This requires resolving fulltext (at least on revlogs). We
422 422 # may want ``verifyintegrity()`` to pass a set of nodes with
423 423 # rename metadata as an optimization.
424 424 rp = fl.renamed(n)
425 425 if rp:
426 426 if lr is not None and ui.verbose:
427 427 ctx = lrugetctx(lr)
428 428 if not any(rp[0] in pctx for pctx in ctx.parents()):
429 429 self._warn(_("warning: copy source of '%s' not"
430 430 " in parents of %s") % (f, ctx))
431 431 fl2 = repo.file(rp[0])
432 432 if not len(fl2):
433 self.err(lr, _("empty or missing copy source "
434 "revlog %s:%s") % (rp[0], short(rp[1])), f)
433 self._err(lr,
434 _("empty or missing copy source revlog "
435 "%s:%s") % (rp[0],
436 short(rp[1])),
437 f)
435 438 elif rp[1] == nullid:
436 439 ui.note(_("warning: %s@%s: copy source"
437 440 " revision is nullid %s:%s\n")
438 441 % (f, lr, rp[0], short(rp[1])))
439 442 else:
440 443 fl2.rev(rp[1])
441 444 except Exception as inst:
442 445 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
443 446
444 447 # cross-check
445 448 if f in filenodes:
446 449 fns = [(v, k) for k, v in filenodes[f].iteritems()]
447 450 for lr, node in sorted(fns):
448 self.err(lr, _("manifest refers to unknown revision %s") %
449 short(node), f)
451 self._err(lr, _("manifest refers to unknown revision %s") %
452 short(node), f)
450 453 progress.complete()
451 454
452 455 if self.warnorphanstorefiles:
453 456 for f in sorted(storefiles):
454 457 self._warn(_("warning: orphan data file '%s'") % f)
455 458
456 459 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now