##// END OF EJS Templates
verify: document the `checklog` method...
marmoute -
r42039:08d97745 default
parent child Browse files
Show More
@@ -1,484 +1,491 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24
25 25 def verify(repo):
26 26 with repo.lock():
27 27 return verifier(repo).verify()
28 28
29 29 def _normpath(f):
30 30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 31 # converted repo may contain repeated slashes
32 32 while '//' in f:
33 33 f = f.replace('//', '/')
34 34 return f
35 35
36 36 class verifier(object):
37 37 def __init__(self, repo):
38 38 self.repo = repo.unfiltered()
39 39 self.ui = repo.ui
40 40 self.match = repo.narrowmatch()
41 41 self.badrevs = set()
42 42 self.errors = 0
43 43 self.warnings = 0
44 44 self.havecl = len(repo.changelog) > 0
45 45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 48 self.refersmf = False
49 49 self.fncachewarned = False
50 50 # developer config: verify.skipflags
51 51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 52 self.warnorphanstorefiles = True
53 53
54 54 def _warn(self, msg):
55 55 """record a "warning" level issue"""
56 56 self.ui.warn(msg + "\n")
57 57 self.warnings += 1
58 58
59 59 def _err(self, linkrev, msg, filename=None):
60 60 """record a "error" level issue"""
61 61 if linkrev is not None:
62 62 self.badrevs.add(linkrev)
63 63 linkrev = "%d" % linkrev
64 64 else:
65 65 linkrev = '?'
66 66 msg = "%s: %s" % (linkrev, msg)
67 67 if filename:
68 68 msg = "%s@%s" % (filename, msg)
69 69 self.ui.warn(" " + msg + "\n")
70 70 self.errors += 1
71 71
72 72 def _exc(self, linkrev, msg, inst, filename=None):
73 73 """record exception raised during the verify process"""
74 74 fmsg = pycompat.bytestr(inst)
75 75 if not fmsg:
76 76 fmsg = pycompat.byterepr(inst)
77 77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
78 78
79 79 def checklog(self, obj, name, linkrev):
80 """verify high level property of a revlog
81
82 - revlog is present,
83 - revlog is non-empty,
84 - sizes (index and data) are correct,
85 - revlog's format version is correct.
86 """
80 87 if not len(obj) and (self.havecl or self.havemf):
81 88 self._err(linkrev, _("empty or missing %s") % name)
82 89 return
83 90
84 91 d = obj.checksize()
85 92 if d[0]:
86 93 self.err(None, _("data length off by %d bytes") % d[0], name)
87 94 if d[1]:
88 95 self.err(None, _("index contains %d extra bytes") % d[1], name)
89 96
90 97 if obj.version != revlog.REVLOGV0:
91 98 if not self.revlogv1:
92 99 self._warn(_("warning: `%s' uses revlog format 1") % name)
93 100 elif self.revlogv1:
94 101 self._warn(_("warning: `%s' uses revlog format 0") % name)
95 102
96 103 def _checkentry(self, obj, i, node, seen, linkrevs, f):
97 104 """verify a single revlog entry
98 105
99 106 arguments are:
100 107 - obj: the source revlog
101 108 - i: the revision number
102 109 - node: the revision node id
103 110 - seen: nodes previously seen for this revlog
104 111 - linkrevs: [changelog-revisions] introducing "node"
105 112 - f: string label ("changelog", "manifest", or filename)
106 113
107 114 Performs the following checks:
108 115 - linkrev points to an existing changelog revision,
109 116 - linkrev points to a changelog revision that introduces this revision,
110 117 - linkrev points to the lowest of these changesets,
111 118 - both parents exist in the revlog,
112 119 - the revision is not duplicated.
113 120
114 121 Return the linkrev of the revision (or None for changelog's revisions).
115 122 """
116 123 lr = obj.linkrev(obj.rev(node))
117 124 if lr < 0 or (self.havecl and lr not in linkrevs):
118 125 if lr < 0 or lr >= len(self.repo.changelog):
119 126 msg = _("rev %d points to nonexistent changeset %d")
120 127 else:
121 128 msg = _("rev %d points to unexpected changeset %d")
122 129 self._err(None, msg % (i, lr), f)
123 130 if linkrevs:
124 131 if f and len(linkrevs) > 1:
125 132 try:
126 133 # attempt to filter down to real linkrevs
127 134 linkrevs = [l for l in linkrevs
128 135 if self.lrugetctx(l)[f].filenode() == node]
129 136 except Exception:
130 137 pass
131 138 self._warn(_(" (expected %s)") % " ".join
132 139 (map(pycompat.bytestr, linkrevs)))
133 140 lr = None # can't be trusted
134 141
135 142 try:
136 143 p1, p2 = obj.parents(node)
137 144 if p1 not in seen and p1 != nullid:
138 145 self._err(lr, _("unknown parent 1 %s of %s") %
139 146 (short(p1), short(node)), f)
140 147 if p2 not in seen and p2 != nullid:
141 148 self._err(lr, _("unknown parent 2 %s of %s") %
142 149 (short(p2), short(node)), f)
143 150 except Exception as inst:
144 151 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
145 152
146 153 if node in seen:
147 154 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
148 155 seen[node] = i
149 156 return lr
150 157
151 158 def verify(self):
152 159 """verify the content of the Mercurial repository
153 160
154 161 This method run all verifications, displaying issues as they are found.
155 162
156 163 return 1 if any error have been encountered, 0 otherwise."""
157 164 # initial validation and generic report
158 165 repo = self.repo
159 166 ui = repo.ui
160 167 if not repo.url().startswith('file:'):
161 168 raise error.Abort(_("cannot verify bundle or remote repos"))
162 169
163 170 if os.path.exists(repo.sjoin("journal")):
164 171 ui.warn(_("abandoned transaction found - run hg recover\n"))
165 172
166 173 if ui.verbose or not self.revlogv1:
167 174 ui.status(_("repository uses revlog format %d\n") %
168 175 (self.revlogv1 and 1 or 0))
169 176
170 177 # data verification
171 178 mflinkrevs, filelinkrevs = self._verifychangelog()
172 179 filenodes = self._verifymanifest(mflinkrevs)
173 180 del mflinkrevs
174 181 self._crosscheckfiles(filelinkrevs, filenodes)
175 182 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
176 183
177 184 # final report
178 185 ui.status(_("checked %d changesets with %d changes to %d files\n") %
179 186 (len(repo.changelog), filerevisions, totalfiles))
180 187 if self.warnings:
181 188 ui.warn(_("%d warnings encountered!\n") % self.warnings)
182 189 if self.fncachewarned:
183 190 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
184 191 'corrupt fncache\n'))
185 192 if self.errors:
186 193 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
187 194 if self.badrevs:
188 195 ui.warn(_("(first damaged changeset appears to be %d)\n")
189 196 % min(self.badrevs))
190 197 return 1
191 198 return 0
192 199
193 200 def _verifychangelog(self):
194 201 ui = self.ui
195 202 repo = self.repo
196 203 match = self.match
197 204 cl = repo.changelog
198 205
199 206 ui.status(_("checking changesets\n"))
200 207 mflinkrevs = {}
201 208 filelinkrevs = {}
202 209 seen = {}
203 210 self.checklog(cl, "changelog", 0)
204 211 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
205 212 total=len(repo))
206 213 for i in repo:
207 214 progress.update(i)
208 215 n = cl.node(i)
209 216 self._checkentry(cl, i, n, seen, [i], "changelog")
210 217
211 218 try:
212 219 changes = cl.read(n)
213 220 if changes[0] != nullid:
214 221 mflinkrevs.setdefault(changes[0], []).append(i)
215 222 self.refersmf = True
216 223 for f in changes[3]:
217 224 if match(f):
218 225 filelinkrevs.setdefault(_normpath(f), []).append(i)
219 226 except Exception as inst:
220 227 self.refersmf = True
221 228 self._exc(i, _("unpacking changeset %s") % short(n), inst)
222 229 progress.complete()
223 230 return mflinkrevs, filelinkrevs
224 231
225 232 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
226 233 subdirprogress=None):
227 234 repo = self.repo
228 235 ui = self.ui
229 236 match = self.match
230 237 mfl = self.repo.manifestlog
231 238 mf = mfl.getstorage(dir)
232 239
233 240 if not dir:
234 241 self.ui.status(_("checking manifests\n"))
235 242
236 243 filenodes = {}
237 244 subdirnodes = {}
238 245 seen = {}
239 246 label = "manifest"
240 247 if dir:
241 248 label = dir
242 249 revlogfiles = mf.files()
243 250 storefiles.difference_update(revlogfiles)
244 251 if subdirprogress: # should be true since we're in a subdirectory
245 252 subdirprogress.increment()
246 253 if self.refersmf:
247 254 # Do not check manifest if there are only changelog entries with
248 255 # null manifests.
249 256 self.checklog(mf, label, 0)
250 257 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
251 258 total=len(mf))
252 259 for i in mf:
253 260 if not dir:
254 261 progress.update(i)
255 262 n = mf.node(i)
256 263 lr = self._checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
257 264 if n in mflinkrevs:
258 265 del mflinkrevs[n]
259 266 elif dir:
260 267 self._err(lr, _("%s not in parent-directory manifest") %
261 268 short(n), label)
262 269 else:
263 270 self._err(lr, _("%s not in changesets") % short(n), label)
264 271
265 272 try:
266 273 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
267 274 for f, fn, fl in mfdelta.iterentries():
268 275 if not f:
269 276 self._err(lr, _("entry without name in manifest"))
270 277 elif f == "/dev/null": # ignore this in very old repos
271 278 continue
272 279 fullpath = dir + _normpath(f)
273 280 if fl == 't':
274 281 if not match.visitdir(fullpath):
275 282 continue
276 283 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
277 284 fn, []).append(lr)
278 285 else:
279 286 if not match(fullpath):
280 287 continue
281 288 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
282 289 except Exception as inst:
283 290 self._exc(lr, _("reading delta %s") % short(n), inst, label)
284 291 if not dir:
285 292 progress.complete()
286 293
287 294 if self.havemf:
288 295 for c, m in sorted([(c, m) for m in mflinkrevs
289 296 for c in mflinkrevs[m]]):
290 297 if dir:
291 298 self._err(c, _("parent-directory manifest refers to unknown"
292 299 " revision %s") % short(m), label)
293 300 else:
294 301 self._err(c, _("changeset refers to unknown revision %s") %
295 302 short(m), label)
296 303
297 304 if not dir and subdirnodes:
298 305 self.ui.status(_("checking directory manifests\n"))
299 306 storefiles = set()
300 307 subdirs = set()
301 308 revlogv1 = self.revlogv1
302 309 for f, f2, size in repo.store.datafiles():
303 310 if not f:
304 311 self._err(None, _("cannot decode filename '%s'") % f2)
305 312 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
306 313 storefiles.add(_normpath(f))
307 314 subdirs.add(os.path.dirname(f))
308 315 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
309 316 total=len(subdirs))
310 317
311 318 for subdir, linkrevs in subdirnodes.iteritems():
312 319 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
313 320 subdirprogress)
314 321 for f, onefilenodes in subdirfilenodes.iteritems():
315 322 filenodes.setdefault(f, {}).update(onefilenodes)
316 323
317 324 if not dir and subdirnodes:
318 325 subdirprogress.complete()
319 326 if self.warnorphanstorefiles:
320 327 for f in sorted(storefiles):
321 328 self._warn(_("warning: orphan data file '%s'") % f)
322 329
323 330 return filenodes
324 331
325 332 def _crosscheckfiles(self, filelinkrevs, filenodes):
326 333 repo = self.repo
327 334 ui = self.ui
328 335 ui.status(_("crosschecking files in changesets and manifests\n"))
329 336
330 337 total = len(filelinkrevs) + len(filenodes)
331 338 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
332 339 total=total)
333 340 if self.havemf:
334 341 for f in sorted(filelinkrevs):
335 342 progress.increment()
336 343 if f not in filenodes:
337 344 lr = filelinkrevs[f][0]
338 345 self._err(lr, _("in changeset but not in manifest"), f)
339 346
340 347 if self.havecl:
341 348 for f in sorted(filenodes):
342 349 progress.increment()
343 350 if f not in filelinkrevs:
344 351 try:
345 352 fl = repo.file(f)
346 353 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
347 354 except Exception:
348 355 lr = None
349 356 self._err(lr, _("in manifest but not in changeset"), f)
350 357
351 358 progress.complete()
352 359
353 360 def _verifyfiles(self, filenodes, filelinkrevs):
354 361 repo = self.repo
355 362 ui = self.ui
356 363 lrugetctx = self.lrugetctx
357 364 revlogv1 = self.revlogv1
358 365 havemf = self.havemf
359 366 ui.status(_("checking files\n"))
360 367
361 368 storefiles = set()
362 369 for f, f2, size in repo.store.datafiles():
363 370 if not f:
364 371 self._err(None, _("cannot decode filename '%s'") % f2)
365 372 elif (size > 0 or not revlogv1) and f.startswith('data/'):
366 373 storefiles.add(_normpath(f))
367 374
368 375 state = {
369 376 # TODO this assumes revlog storage for changelog.
370 377 'expectedversion': self.repo.changelog.version & 0xFFFF,
371 378 'skipflags': self.skipflags,
372 379 # experimental config: censor.policy
373 380 'erroroncensored': ui.config('censor', 'policy') == 'abort',
374 381 }
375 382
376 383 files = sorted(set(filenodes) | set(filelinkrevs))
377 384 revisions = 0
378 385 progress = ui.makeprogress(_('checking'), unit=_('files'),
379 386 total=len(files))
380 387 for i, f in enumerate(files):
381 388 progress.update(i, item=f)
382 389 try:
383 390 linkrevs = filelinkrevs[f]
384 391 except KeyError:
385 392 # in manifest but not in changelog
386 393 linkrevs = []
387 394
388 395 if linkrevs:
389 396 lr = linkrevs[0]
390 397 else:
391 398 lr = None
392 399
393 400 try:
394 401 fl = repo.file(f)
395 402 except error.StorageError as e:
396 403 self._err(lr, _("broken revlog! (%s)") % e, f)
397 404 continue
398 405
399 406 for ff in fl.files():
400 407 try:
401 408 storefiles.remove(ff)
402 409 except KeyError:
403 410 if self.warnorphanstorefiles:
404 411 self._warn(_(" warning: revlog '%s' not in fncache!") %
405 412 ff)
406 413 self.fncachewarned = True
407 414
408 415 if not len(fl) and (self.havecl or self.havemf):
409 416 self._err(lr, _("empty or missing %s") % f)
410 417 else:
411 418 # Guard against implementations not setting this.
412 419 state['skipread'] = set()
413 420 for problem in fl.verifyintegrity(state):
414 421 if problem.node is not None:
415 422 linkrev = fl.linkrev(fl.rev(problem.node))
416 423 else:
417 424 linkrev = None
418 425
419 426 if problem.warning:
420 427 self._warn(problem.warning)
421 428 elif problem.error:
422 429 self._err(linkrev if linkrev is not None else lr,
423 430 problem.error, f)
424 431 else:
425 432 raise error.ProgrammingError(
426 433 'problem instance does not set warning or error '
427 434 'attribute: %s' % problem.msg)
428 435
429 436 seen = {}
430 437 for i in fl:
431 438 revisions += 1
432 439 n = fl.node(i)
433 440 lr = self._checkentry(fl, i, n, seen, linkrevs, f)
434 441 if f in filenodes:
435 442 if havemf and n not in filenodes[f]:
436 443 self._err(lr, _("%s not in manifests") % (short(n)), f)
437 444 else:
438 445 del filenodes[f][n]
439 446
440 447 if n in state['skipread']:
441 448 continue
442 449
443 450 # check renames
444 451 try:
445 452 # This requires resolving fulltext (at least on revlogs). We
446 453 # may want ``verifyintegrity()`` to pass a set of nodes with
447 454 # rename metadata as an optimization.
448 455 rp = fl.renamed(n)
449 456 if rp:
450 457 if lr is not None and ui.verbose:
451 458 ctx = lrugetctx(lr)
452 459 if not any(rp[0] in pctx for pctx in ctx.parents()):
453 460 self._warn(_("warning: copy source of '%s' not"
454 461 " in parents of %s") % (f, ctx))
455 462 fl2 = repo.file(rp[0])
456 463 if not len(fl2):
457 464 self._err(lr,
458 465 _("empty or missing copy source revlog "
459 466 "%s:%s") % (rp[0],
460 467 short(rp[1])),
461 468 f)
462 469 elif rp[1] == nullid:
463 470 ui.note(_("warning: %s@%s: copy source"
464 471 " revision is nullid %s:%s\n")
465 472 % (f, lr, rp[0], short(rp[1])))
466 473 else:
467 474 fl2.rev(rp[1])
468 475 except Exception as inst:
469 476 self._exc(lr, _("checking rename of %s") % short(n),
470 477 inst, f)
471 478
472 479 # cross-check
473 480 if f in filenodes:
474 481 fns = [(v, k) for k, v in filenodes[f].iteritems()]
475 482 for lr, node in sorted(fns):
476 483 self._err(lr, _("manifest refers to unknown revision %s") %
477 484 short(node), f)
478 485 progress.complete()
479 486
480 487 if self.warnorphanstorefiles:
481 488 for f in sorted(storefiles):
482 489 self._warn(_("warning: orphan data file '%s'") % f)
483 490
484 491 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now