##// END OF EJS Templates
verify: add some inline documentation to the top level `verify` method...
marmoute -
r42035:4da2261e default
parent child Browse files
Show More
@@ -1,467 +1,465
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24
25 25 def verify(repo):
26 26 with repo.lock():
27 27 return verifier(repo).verify()
28 28
29 29 def _normpath(f):
30 30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 31 # converted repo may contain repeated slashes
32 32 while '//' in f:
33 33 f = f.replace('//', '/')
34 34 return f
35 35
36 36 class verifier(object):
37 37 def __init__(self, repo):
38 38 self.repo = repo.unfiltered()
39 39 self.ui = repo.ui
40 40 self.match = repo.narrowmatch()
41 41 self.badrevs = set()
42 42 self.errors = 0
43 43 self.warnings = 0
44 44 self.havecl = len(repo.changelog) > 0
45 45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 48 self.refersmf = False
49 49 self.fncachewarned = False
50 50 # developer config: verify.skipflags
51 51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 52 self.warnorphanstorefiles = True
53 53
54 54 def _warn(self, msg):
55 55 """record a "warning" level issue"""
56 56 self.ui.warn(msg + "\n")
57 57 self.warnings += 1
58 58
59 59 def _err(self, linkrev, msg, filename=None):
60 60 """record a "error" level issue"""
61 61 if linkrev is not None:
62 62 self.badrevs.add(linkrev)
63 63 linkrev = "%d" % linkrev
64 64 else:
65 65 linkrev = '?'
66 66 msg = "%s: %s" % (linkrev, msg)
67 67 if filename:
68 68 msg = "%s@%s" % (filename, msg)
69 69 self.ui.warn(" " + msg + "\n")
70 70 self.errors += 1
71 71
72 72 def _exc(self, linkrev, msg, inst, filename=None):
73 73 """record exception raised during the verify process"""
74 74 fmsg = pycompat.bytestr(inst)
75 75 if not fmsg:
76 76 fmsg = pycompat.byterepr(inst)
77 77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
78 78
79 79 def checklog(self, obj, name, linkrev):
80 80 if not len(obj) and (self.havecl or self.havemf):
81 81 self._err(linkrev, _("empty or missing %s") % name)
82 82 return
83 83
84 84 d = obj.checksize()
85 85 if d[0]:
86 86 self.err(None, _("data length off by %d bytes") % d[0], name)
87 87 if d[1]:
88 88 self.err(None, _("index contains %d extra bytes") % d[1], name)
89 89
90 90 if obj.version != revlog.REVLOGV0:
91 91 if not self.revlogv1:
92 92 self._warn(_("warning: `%s' uses revlog format 1") % name)
93 93 elif self.revlogv1:
94 94 self._warn(_("warning: `%s' uses revlog format 0") % name)
95 95
96 96 def checkentry(self, obj, i, node, seen, linkrevs, f):
97 97 lr = obj.linkrev(obj.rev(node))
98 98 if lr < 0 or (self.havecl and lr not in linkrevs):
99 99 if lr < 0 or lr >= len(self.repo.changelog):
100 100 msg = _("rev %d points to nonexistent changeset %d")
101 101 else:
102 102 msg = _("rev %d points to unexpected changeset %d")
103 103 self._err(None, msg % (i, lr), f)
104 104 if linkrevs:
105 105 if f and len(linkrevs) > 1:
106 106 try:
107 107 # attempt to filter down to real linkrevs
108 108 linkrevs = [l for l in linkrevs
109 109 if self.lrugetctx(l)[f].filenode() == node]
110 110 except Exception:
111 111 pass
112 112 self._warn(_(" (expected %s)") % " ".join
113 113 (map(pycompat.bytestr, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self._err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self._err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 125 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 """verify the content of the Mercurial repository
134 134
135 135 This method run all verifications, displaying issues as they are found.
136 136
137 137 return 1 if any error have been encountered, 0 otherwise."""
138 # initial validation and generic report
138 139 repo = self.repo
139
140 140 ui = repo.ui
141
142 141 if not repo.url().startswith('file:'):
143 142 raise error.Abort(_("cannot verify bundle or remote repos"))
144 143
145 144 if os.path.exists(repo.sjoin("journal")):
146 145 ui.warn(_("abandoned transaction found - run hg recover\n"))
147 146
148 147 if ui.verbose or not self.revlogv1:
149 148 ui.status(_("repository uses revlog format %d\n") %
150 149 (self.revlogv1 and 1 or 0))
151 150
151 # data verification
152 152 mflinkrevs, filelinkrevs = self._verifychangelog()
153
154 153 filenodes = self._verifymanifest(mflinkrevs)
155 154 del mflinkrevs
156
157 155 self._crosscheckfiles(filelinkrevs, filenodes)
158
159 156 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
160 157
158 # final report
161 159 ui.status(_("checked %d changesets with %d changes to %d files\n") %
162 160 (len(repo.changelog), filerevisions, totalfiles))
163 161 if self.warnings:
164 162 ui.warn(_("%d warnings encountered!\n") % self.warnings)
165 163 if self.fncachewarned:
166 164 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
167 165 'corrupt fncache\n'))
168 166 if self.errors:
169 167 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
170 168 if self.badrevs:
171 169 ui.warn(_("(first damaged changeset appears to be %d)\n")
172 170 % min(self.badrevs))
173 171 return 1
174 172 return 0
175 173
176 174 def _verifychangelog(self):
177 175 ui = self.ui
178 176 repo = self.repo
179 177 match = self.match
180 178 cl = repo.changelog
181 179
182 180 ui.status(_("checking changesets\n"))
183 181 mflinkrevs = {}
184 182 filelinkrevs = {}
185 183 seen = {}
186 184 self.checklog(cl, "changelog", 0)
187 185 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
188 186 total=len(repo))
189 187 for i in repo:
190 188 progress.update(i)
191 189 n = cl.node(i)
192 190 self.checkentry(cl, i, n, seen, [i], "changelog")
193 191
194 192 try:
195 193 changes = cl.read(n)
196 194 if changes[0] != nullid:
197 195 mflinkrevs.setdefault(changes[0], []).append(i)
198 196 self.refersmf = True
199 197 for f in changes[3]:
200 198 if match(f):
201 199 filelinkrevs.setdefault(_normpath(f), []).append(i)
202 200 except Exception as inst:
203 201 self.refersmf = True
204 202 self._exc(i, _("unpacking changeset %s") % short(n), inst)
205 203 progress.complete()
206 204 return mflinkrevs, filelinkrevs
207 205
208 206 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
209 207 subdirprogress=None):
210 208 repo = self.repo
211 209 ui = self.ui
212 210 match = self.match
213 211 mfl = self.repo.manifestlog
214 212 mf = mfl.getstorage(dir)
215 213
216 214 if not dir:
217 215 self.ui.status(_("checking manifests\n"))
218 216
219 217 filenodes = {}
220 218 subdirnodes = {}
221 219 seen = {}
222 220 label = "manifest"
223 221 if dir:
224 222 label = dir
225 223 revlogfiles = mf.files()
226 224 storefiles.difference_update(revlogfiles)
227 225 if subdirprogress: # should be true since we're in a subdirectory
228 226 subdirprogress.increment()
229 227 if self.refersmf:
230 228 # Do not check manifest if there are only changelog entries with
231 229 # null manifests.
232 230 self.checklog(mf, label, 0)
233 231 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
234 232 total=len(mf))
235 233 for i in mf:
236 234 if not dir:
237 235 progress.update(i)
238 236 n = mf.node(i)
239 237 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
240 238 if n in mflinkrevs:
241 239 del mflinkrevs[n]
242 240 elif dir:
243 241 self._err(lr, _("%s not in parent-directory manifest") %
244 242 short(n), label)
245 243 else:
246 244 self._err(lr, _("%s not in changesets") % short(n), label)
247 245
248 246 try:
249 247 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
250 248 for f, fn, fl in mfdelta.iterentries():
251 249 if not f:
252 250 self._err(lr, _("entry without name in manifest"))
253 251 elif f == "/dev/null": # ignore this in very old repos
254 252 continue
255 253 fullpath = dir + _normpath(f)
256 254 if fl == 't':
257 255 if not match.visitdir(fullpath):
258 256 continue
259 257 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
260 258 fn, []).append(lr)
261 259 else:
262 260 if not match(fullpath):
263 261 continue
264 262 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
265 263 except Exception as inst:
266 264 self._exc(lr, _("reading delta %s") % short(n), inst, label)
267 265 if not dir:
268 266 progress.complete()
269 267
270 268 if self.havemf:
271 269 for c, m in sorted([(c, m) for m in mflinkrevs
272 270 for c in mflinkrevs[m]]):
273 271 if dir:
274 272 self._err(c, _("parent-directory manifest refers to unknown"
275 273 " revision %s") % short(m), label)
276 274 else:
277 275 self._err(c, _("changeset refers to unknown revision %s") %
278 276 short(m), label)
279 277
280 278 if not dir and subdirnodes:
281 279 self.ui.status(_("checking directory manifests\n"))
282 280 storefiles = set()
283 281 subdirs = set()
284 282 revlogv1 = self.revlogv1
285 283 for f, f2, size in repo.store.datafiles():
286 284 if not f:
287 285 self._err(None, _("cannot decode filename '%s'") % f2)
288 286 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
289 287 storefiles.add(_normpath(f))
290 288 subdirs.add(os.path.dirname(f))
291 289 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
292 290 total=len(subdirs))
293 291
294 292 for subdir, linkrevs in subdirnodes.iteritems():
295 293 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
296 294 subdirprogress)
297 295 for f, onefilenodes in subdirfilenodes.iteritems():
298 296 filenodes.setdefault(f, {}).update(onefilenodes)
299 297
300 298 if not dir and subdirnodes:
301 299 subdirprogress.complete()
302 300 if self.warnorphanstorefiles:
303 301 for f in sorted(storefiles):
304 302 self._warn(_("warning: orphan data file '%s'") % f)
305 303
306 304 return filenodes
307 305
308 306 def _crosscheckfiles(self, filelinkrevs, filenodes):
309 307 repo = self.repo
310 308 ui = self.ui
311 309 ui.status(_("crosschecking files in changesets and manifests\n"))
312 310
313 311 total = len(filelinkrevs) + len(filenodes)
314 312 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
315 313 total=total)
316 314 if self.havemf:
317 315 for f in sorted(filelinkrevs):
318 316 progress.increment()
319 317 if f not in filenodes:
320 318 lr = filelinkrevs[f][0]
321 319 self._err(lr, _("in changeset but not in manifest"), f)
322 320
323 321 if self.havecl:
324 322 for f in sorted(filenodes):
325 323 progress.increment()
326 324 if f not in filelinkrevs:
327 325 try:
328 326 fl = repo.file(f)
329 327 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
330 328 except Exception:
331 329 lr = None
332 330 self._err(lr, _("in manifest but not in changeset"), f)
333 331
334 332 progress.complete()
335 333
336 334 def _verifyfiles(self, filenodes, filelinkrevs):
337 335 repo = self.repo
338 336 ui = self.ui
339 337 lrugetctx = self.lrugetctx
340 338 revlogv1 = self.revlogv1
341 339 havemf = self.havemf
342 340 ui.status(_("checking files\n"))
343 341
344 342 storefiles = set()
345 343 for f, f2, size in repo.store.datafiles():
346 344 if not f:
347 345 self._err(None, _("cannot decode filename '%s'") % f2)
348 346 elif (size > 0 or not revlogv1) and f.startswith('data/'):
349 347 storefiles.add(_normpath(f))
350 348
351 349 state = {
352 350 # TODO this assumes revlog storage for changelog.
353 351 'expectedversion': self.repo.changelog.version & 0xFFFF,
354 352 'skipflags': self.skipflags,
355 353 # experimental config: censor.policy
356 354 'erroroncensored': ui.config('censor', 'policy') == 'abort',
357 355 }
358 356
359 357 files = sorted(set(filenodes) | set(filelinkrevs))
360 358 revisions = 0
361 359 progress = ui.makeprogress(_('checking'), unit=_('files'),
362 360 total=len(files))
363 361 for i, f in enumerate(files):
364 362 progress.update(i, item=f)
365 363 try:
366 364 linkrevs = filelinkrevs[f]
367 365 except KeyError:
368 366 # in manifest but not in changelog
369 367 linkrevs = []
370 368
371 369 if linkrevs:
372 370 lr = linkrevs[0]
373 371 else:
374 372 lr = None
375 373
376 374 try:
377 375 fl = repo.file(f)
378 376 except error.StorageError as e:
379 377 self._err(lr, _("broken revlog! (%s)") % e, f)
380 378 continue
381 379
382 380 for ff in fl.files():
383 381 try:
384 382 storefiles.remove(ff)
385 383 except KeyError:
386 384 if self.warnorphanstorefiles:
387 385 self._warn(_(" warning: revlog '%s' not in fncache!") %
388 386 ff)
389 387 self.fncachewarned = True
390 388
391 389 if not len(fl) and (self.havecl or self.havemf):
392 390 self._err(lr, _("empty or missing %s") % f)
393 391 else:
394 392 # Guard against implementations not setting this.
395 393 state['skipread'] = set()
396 394 for problem in fl.verifyintegrity(state):
397 395 if problem.node is not None:
398 396 linkrev = fl.linkrev(fl.rev(problem.node))
399 397 else:
400 398 linkrev = None
401 399
402 400 if problem.warning:
403 401 self._warn(problem.warning)
404 402 elif problem.error:
405 403 self._err(linkrev if linkrev is not None else lr,
406 404 problem.error, f)
407 405 else:
408 406 raise error.ProgrammingError(
409 407 'problem instance does not set warning or error '
410 408 'attribute: %s' % problem.msg)
411 409
412 410 seen = {}
413 411 for i in fl:
414 412 revisions += 1
415 413 n = fl.node(i)
416 414 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
417 415 if f in filenodes:
418 416 if havemf and n not in filenodes[f]:
419 417 self._err(lr, _("%s not in manifests") % (short(n)), f)
420 418 else:
421 419 del filenodes[f][n]
422 420
423 421 if n in state['skipread']:
424 422 continue
425 423
426 424 # check renames
427 425 try:
428 426 # This requires resolving fulltext (at least on revlogs). We
429 427 # may want ``verifyintegrity()`` to pass a set of nodes with
430 428 # rename metadata as an optimization.
431 429 rp = fl.renamed(n)
432 430 if rp:
433 431 if lr is not None and ui.verbose:
434 432 ctx = lrugetctx(lr)
435 433 if not any(rp[0] in pctx for pctx in ctx.parents()):
436 434 self._warn(_("warning: copy source of '%s' not"
437 435 " in parents of %s") % (f, ctx))
438 436 fl2 = repo.file(rp[0])
439 437 if not len(fl2):
440 438 self._err(lr,
441 439 _("empty or missing copy source revlog "
442 440 "%s:%s") % (rp[0],
443 441 short(rp[1])),
444 442 f)
445 443 elif rp[1] == nullid:
446 444 ui.note(_("warning: %s@%s: copy source"
447 445 " revision is nullid %s:%s\n")
448 446 % (f, lr, rp[0], short(rp[1])))
449 447 else:
450 448 fl2.rev(rp[1])
451 449 except Exception as inst:
452 450 self._exc(lr, _("checking rename of %s") % short(n),
453 451 inst, f)
454 452
455 453 # cross-check
456 454 if f in filenodes:
457 455 fns = [(v, k) for k, v in filenodes[f].iteritems()]
458 456 for lr, node in sorted(fns):
459 457 self._err(lr, _("manifest refers to unknown revision %s") %
460 458 short(node), f)
461 459 progress.complete()
462 460
463 461 if self.warnorphanstorefiles:
464 462 for f in sorted(storefiles):
465 463 self._warn(_("warning: orphan data file '%s'") % f)
466 464
467 465 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now