##// END OF EJS Templates
verify: document the `checkentry` method...
marmoute -
r42036:00c9fde7 default
parent child Browse files
Show More
@@ -1,465 +1,484
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 pycompat,
21 21 revlog,
22 22 util,
23 23 )
24 24
25 25 def verify(repo):
26 26 with repo.lock():
27 27 return verifier(repo).verify()
28 28
29 29 def _normpath(f):
30 30 # under hg < 2.4, convert didn't sanitize paths properly, so a
31 31 # converted repo may contain repeated slashes
32 32 while '//' in f:
33 33 f = f.replace('//', '/')
34 34 return f
35 35
36 36 class verifier(object):
37 37 def __init__(self, repo):
38 38 self.repo = repo.unfiltered()
39 39 self.ui = repo.ui
40 40 self.match = repo.narrowmatch()
41 41 self.badrevs = set()
42 42 self.errors = 0
43 43 self.warnings = 0
44 44 self.havecl = len(repo.changelog) > 0
45 45 self.havemf = len(repo.manifestlog.getstorage(b'')) > 0
46 46 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
47 47 self.lrugetctx = util.lrucachefunc(repo.__getitem__)
48 48 self.refersmf = False
49 49 self.fncachewarned = False
50 50 # developer config: verify.skipflags
51 51 self.skipflags = repo.ui.configint('verify', 'skipflags')
52 52 self.warnorphanstorefiles = True
53 53
54 54 def _warn(self, msg):
55 55 """record a "warning" level issue"""
56 56 self.ui.warn(msg + "\n")
57 57 self.warnings += 1
58 58
59 59 def _err(self, linkrev, msg, filename=None):
60 60 """record a "error" level issue"""
61 61 if linkrev is not None:
62 62 self.badrevs.add(linkrev)
63 63 linkrev = "%d" % linkrev
64 64 else:
65 65 linkrev = '?'
66 66 msg = "%s: %s" % (linkrev, msg)
67 67 if filename:
68 68 msg = "%s@%s" % (filename, msg)
69 69 self.ui.warn(" " + msg + "\n")
70 70 self.errors += 1
71 71
72 72 def _exc(self, linkrev, msg, inst, filename=None):
73 73 """record exception raised during the verify process"""
74 74 fmsg = pycompat.bytestr(inst)
75 75 if not fmsg:
76 76 fmsg = pycompat.byterepr(inst)
77 77 self._err(linkrev, "%s: %s" % (msg, fmsg), filename)
78 78
79 79 def checklog(self, obj, name, linkrev):
80 80 if not len(obj) and (self.havecl or self.havemf):
81 81 self._err(linkrev, _("empty or missing %s") % name)
82 82 return
83 83
84 84 d = obj.checksize()
85 85 if d[0]:
86 86 self.err(None, _("data length off by %d bytes") % d[0], name)
87 87 if d[1]:
88 88 self.err(None, _("index contains %d extra bytes") % d[1], name)
89 89
90 90 if obj.version != revlog.REVLOGV0:
91 91 if not self.revlogv1:
92 92 self._warn(_("warning: `%s' uses revlog format 1") % name)
93 93 elif self.revlogv1:
94 94 self._warn(_("warning: `%s' uses revlog format 0") % name)
95 95
96 96 def checkentry(self, obj, i, node, seen, linkrevs, f):
97 """verify a single revlog entry
98
99 arguments are:
100 - obj: the source revlog
101 - i: the revision number
102 - node: the revision node id
103 - seen: nodes previously seen for this revlog
104 - linkrevs: [changelog-revisions] introducing "node"
105 - f: string label ("changelog", "manifest", or filename)
106
107 Performs the following checks:
108 - linkrev points to an existing changelog revision,
109 - linkrev points to a changelog revision that introduces this revision,
110 - linkrev points to the lowest of these changesets,
111 - both parents exist in the revlog,
112 - the revision is not duplicated.
113
114 Return the linkrev of the revision (or None for changelog's revisions).
115 """
97 116 lr = obj.linkrev(obj.rev(node))
98 117 if lr < 0 or (self.havecl and lr not in linkrevs):
99 118 if lr < 0 or lr >= len(self.repo.changelog):
100 119 msg = _("rev %d points to nonexistent changeset %d")
101 120 else:
102 121 msg = _("rev %d points to unexpected changeset %d")
103 122 self._err(None, msg % (i, lr), f)
104 123 if linkrevs:
105 124 if f and len(linkrevs) > 1:
106 125 try:
107 126 # attempt to filter down to real linkrevs
108 127 linkrevs = [l for l in linkrevs
109 128 if self.lrugetctx(l)[f].filenode() == node]
110 129 except Exception:
111 130 pass
112 131 self._warn(_(" (expected %s)") % " ".join
113 132 (map(pycompat.bytestr, linkrevs)))
114 133 lr = None # can't be trusted
115 134
116 135 try:
117 136 p1, p2 = obj.parents(node)
118 137 if p1 not in seen and p1 != nullid:
119 138 self._err(lr, _("unknown parent 1 %s of %s") %
120 139 (short(p1), short(node)), f)
121 140 if p2 not in seen and p2 != nullid:
122 141 self._err(lr, _("unknown parent 2 %s of %s") %
123 142 (short(p2), short(node)), f)
124 143 except Exception as inst:
125 144 self._exc(lr, _("checking parents of %s") % short(node), inst, f)
126 145
127 146 if node in seen:
128 147 self._err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 148 seen[node] = i
130 149 return lr
131 150
132 151 def verify(self):
133 152 """verify the content of the Mercurial repository
134 153
135 154 This method run all verifications, displaying issues as they are found.
136 155
137 156 return 1 if any error have been encountered, 0 otherwise."""
138 157 # initial validation and generic report
139 158 repo = self.repo
140 159 ui = repo.ui
141 160 if not repo.url().startswith('file:'):
142 161 raise error.Abort(_("cannot verify bundle or remote repos"))
143 162
144 163 if os.path.exists(repo.sjoin("journal")):
145 164 ui.warn(_("abandoned transaction found - run hg recover\n"))
146 165
147 166 if ui.verbose or not self.revlogv1:
148 167 ui.status(_("repository uses revlog format %d\n") %
149 168 (self.revlogv1 and 1 or 0))
150 169
151 170 # data verification
152 171 mflinkrevs, filelinkrevs = self._verifychangelog()
153 172 filenodes = self._verifymanifest(mflinkrevs)
154 173 del mflinkrevs
155 174 self._crosscheckfiles(filelinkrevs, filenodes)
156 175 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
157 176
158 177 # final report
159 178 ui.status(_("checked %d changesets with %d changes to %d files\n") %
160 179 (len(repo.changelog), filerevisions, totalfiles))
161 180 if self.warnings:
162 181 ui.warn(_("%d warnings encountered!\n") % self.warnings)
163 182 if self.fncachewarned:
164 183 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
165 184 'corrupt fncache\n'))
166 185 if self.errors:
167 186 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
168 187 if self.badrevs:
169 188 ui.warn(_("(first damaged changeset appears to be %d)\n")
170 189 % min(self.badrevs))
171 190 return 1
172 191 return 0
173 192
174 193 def _verifychangelog(self):
175 194 ui = self.ui
176 195 repo = self.repo
177 196 match = self.match
178 197 cl = repo.changelog
179 198
180 199 ui.status(_("checking changesets\n"))
181 200 mflinkrevs = {}
182 201 filelinkrevs = {}
183 202 seen = {}
184 203 self.checklog(cl, "changelog", 0)
185 204 progress = ui.makeprogress(_('checking'), unit=_('changesets'),
186 205 total=len(repo))
187 206 for i in repo:
188 207 progress.update(i)
189 208 n = cl.node(i)
190 209 self.checkentry(cl, i, n, seen, [i], "changelog")
191 210
192 211 try:
193 212 changes = cl.read(n)
194 213 if changes[0] != nullid:
195 214 mflinkrevs.setdefault(changes[0], []).append(i)
196 215 self.refersmf = True
197 216 for f in changes[3]:
198 217 if match(f):
199 218 filelinkrevs.setdefault(_normpath(f), []).append(i)
200 219 except Exception as inst:
201 220 self.refersmf = True
202 221 self._exc(i, _("unpacking changeset %s") % short(n), inst)
203 222 progress.complete()
204 223 return mflinkrevs, filelinkrevs
205 224
206 225 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
207 226 subdirprogress=None):
208 227 repo = self.repo
209 228 ui = self.ui
210 229 match = self.match
211 230 mfl = self.repo.manifestlog
212 231 mf = mfl.getstorage(dir)
213 232
214 233 if not dir:
215 234 self.ui.status(_("checking manifests\n"))
216 235
217 236 filenodes = {}
218 237 subdirnodes = {}
219 238 seen = {}
220 239 label = "manifest"
221 240 if dir:
222 241 label = dir
223 242 revlogfiles = mf.files()
224 243 storefiles.difference_update(revlogfiles)
225 244 if subdirprogress: # should be true since we're in a subdirectory
226 245 subdirprogress.increment()
227 246 if self.refersmf:
228 247 # Do not check manifest if there are only changelog entries with
229 248 # null manifests.
230 249 self.checklog(mf, label, 0)
231 250 progress = ui.makeprogress(_('checking'), unit=_('manifests'),
232 251 total=len(mf))
233 252 for i in mf:
234 253 if not dir:
235 254 progress.update(i)
236 255 n = mf.node(i)
237 256 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
238 257 if n in mflinkrevs:
239 258 del mflinkrevs[n]
240 259 elif dir:
241 260 self._err(lr, _("%s not in parent-directory manifest") %
242 261 short(n), label)
243 262 else:
244 263 self._err(lr, _("%s not in changesets") % short(n), label)
245 264
246 265 try:
247 266 mfdelta = mfl.get(dir, n).readdelta(shallow=True)
248 267 for f, fn, fl in mfdelta.iterentries():
249 268 if not f:
250 269 self._err(lr, _("entry without name in manifest"))
251 270 elif f == "/dev/null": # ignore this in very old repos
252 271 continue
253 272 fullpath = dir + _normpath(f)
254 273 if fl == 't':
255 274 if not match.visitdir(fullpath):
256 275 continue
257 276 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
258 277 fn, []).append(lr)
259 278 else:
260 279 if not match(fullpath):
261 280 continue
262 281 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
263 282 except Exception as inst:
264 283 self._exc(lr, _("reading delta %s") % short(n), inst, label)
265 284 if not dir:
266 285 progress.complete()
267 286
268 287 if self.havemf:
269 288 for c, m in sorted([(c, m) for m in mflinkrevs
270 289 for c in mflinkrevs[m]]):
271 290 if dir:
272 291 self._err(c, _("parent-directory manifest refers to unknown"
273 292 " revision %s") % short(m), label)
274 293 else:
275 294 self._err(c, _("changeset refers to unknown revision %s") %
276 295 short(m), label)
277 296
278 297 if not dir and subdirnodes:
279 298 self.ui.status(_("checking directory manifests\n"))
280 299 storefiles = set()
281 300 subdirs = set()
282 301 revlogv1 = self.revlogv1
283 302 for f, f2, size in repo.store.datafiles():
284 303 if not f:
285 304 self._err(None, _("cannot decode filename '%s'") % f2)
286 305 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
287 306 storefiles.add(_normpath(f))
288 307 subdirs.add(os.path.dirname(f))
289 308 subdirprogress = ui.makeprogress(_('checking'), unit=_('manifests'),
290 309 total=len(subdirs))
291 310
292 311 for subdir, linkrevs in subdirnodes.iteritems():
293 312 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
294 313 subdirprogress)
295 314 for f, onefilenodes in subdirfilenodes.iteritems():
296 315 filenodes.setdefault(f, {}).update(onefilenodes)
297 316
298 317 if not dir and subdirnodes:
299 318 subdirprogress.complete()
300 319 if self.warnorphanstorefiles:
301 320 for f in sorted(storefiles):
302 321 self._warn(_("warning: orphan data file '%s'") % f)
303 322
304 323 return filenodes
305 324
306 325 def _crosscheckfiles(self, filelinkrevs, filenodes):
307 326 repo = self.repo
308 327 ui = self.ui
309 328 ui.status(_("crosschecking files in changesets and manifests\n"))
310 329
311 330 total = len(filelinkrevs) + len(filenodes)
312 331 progress = ui.makeprogress(_('crosschecking'), unit=_('files'),
313 332 total=total)
314 333 if self.havemf:
315 334 for f in sorted(filelinkrevs):
316 335 progress.increment()
317 336 if f not in filenodes:
318 337 lr = filelinkrevs[f][0]
319 338 self._err(lr, _("in changeset but not in manifest"), f)
320 339
321 340 if self.havecl:
322 341 for f in sorted(filenodes):
323 342 progress.increment()
324 343 if f not in filelinkrevs:
325 344 try:
326 345 fl = repo.file(f)
327 346 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
328 347 except Exception:
329 348 lr = None
330 349 self._err(lr, _("in manifest but not in changeset"), f)
331 350
332 351 progress.complete()
333 352
334 353 def _verifyfiles(self, filenodes, filelinkrevs):
335 354 repo = self.repo
336 355 ui = self.ui
337 356 lrugetctx = self.lrugetctx
338 357 revlogv1 = self.revlogv1
339 358 havemf = self.havemf
340 359 ui.status(_("checking files\n"))
341 360
342 361 storefiles = set()
343 362 for f, f2, size in repo.store.datafiles():
344 363 if not f:
345 364 self._err(None, _("cannot decode filename '%s'") % f2)
346 365 elif (size > 0 or not revlogv1) and f.startswith('data/'):
347 366 storefiles.add(_normpath(f))
348 367
349 368 state = {
350 369 # TODO this assumes revlog storage for changelog.
351 370 'expectedversion': self.repo.changelog.version & 0xFFFF,
352 371 'skipflags': self.skipflags,
353 372 # experimental config: censor.policy
354 373 'erroroncensored': ui.config('censor', 'policy') == 'abort',
355 374 }
356 375
357 376 files = sorted(set(filenodes) | set(filelinkrevs))
358 377 revisions = 0
359 378 progress = ui.makeprogress(_('checking'), unit=_('files'),
360 379 total=len(files))
361 380 for i, f in enumerate(files):
362 381 progress.update(i, item=f)
363 382 try:
364 383 linkrevs = filelinkrevs[f]
365 384 except KeyError:
366 385 # in manifest but not in changelog
367 386 linkrevs = []
368 387
369 388 if linkrevs:
370 389 lr = linkrevs[0]
371 390 else:
372 391 lr = None
373 392
374 393 try:
375 394 fl = repo.file(f)
376 395 except error.StorageError as e:
377 396 self._err(lr, _("broken revlog! (%s)") % e, f)
378 397 continue
379 398
380 399 for ff in fl.files():
381 400 try:
382 401 storefiles.remove(ff)
383 402 except KeyError:
384 403 if self.warnorphanstorefiles:
385 404 self._warn(_(" warning: revlog '%s' not in fncache!") %
386 405 ff)
387 406 self.fncachewarned = True
388 407
389 408 if not len(fl) and (self.havecl or self.havemf):
390 409 self._err(lr, _("empty or missing %s") % f)
391 410 else:
392 411 # Guard against implementations not setting this.
393 412 state['skipread'] = set()
394 413 for problem in fl.verifyintegrity(state):
395 414 if problem.node is not None:
396 415 linkrev = fl.linkrev(fl.rev(problem.node))
397 416 else:
398 417 linkrev = None
399 418
400 419 if problem.warning:
401 420 self._warn(problem.warning)
402 421 elif problem.error:
403 422 self._err(linkrev if linkrev is not None else lr,
404 423 problem.error, f)
405 424 else:
406 425 raise error.ProgrammingError(
407 426 'problem instance does not set warning or error '
408 427 'attribute: %s' % problem.msg)
409 428
410 429 seen = {}
411 430 for i in fl:
412 431 revisions += 1
413 432 n = fl.node(i)
414 433 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
415 434 if f in filenodes:
416 435 if havemf and n not in filenodes[f]:
417 436 self._err(lr, _("%s not in manifests") % (short(n)), f)
418 437 else:
419 438 del filenodes[f][n]
420 439
421 440 if n in state['skipread']:
422 441 continue
423 442
424 443 # check renames
425 444 try:
426 445 # This requires resolving fulltext (at least on revlogs). We
427 446 # may want ``verifyintegrity()`` to pass a set of nodes with
428 447 # rename metadata as an optimization.
429 448 rp = fl.renamed(n)
430 449 if rp:
431 450 if lr is not None and ui.verbose:
432 451 ctx = lrugetctx(lr)
433 452 if not any(rp[0] in pctx for pctx in ctx.parents()):
434 453 self._warn(_("warning: copy source of '%s' not"
435 454 " in parents of %s") % (f, ctx))
436 455 fl2 = repo.file(rp[0])
437 456 if not len(fl2):
438 457 self._err(lr,
439 458 _("empty or missing copy source revlog "
440 459 "%s:%s") % (rp[0],
441 460 short(rp[1])),
442 461 f)
443 462 elif rp[1] == nullid:
444 463 ui.note(_("warning: %s@%s: copy source"
445 464 " revision is nullid %s:%s\n")
446 465 % (f, lr, rp[0], short(rp[1])))
447 466 else:
448 467 fl2.rev(rp[1])
449 468 except Exception as inst:
450 469 self._exc(lr, _("checking rename of %s") % short(n),
451 470 inst, f)
452 471
453 472 # cross-check
454 473 if f in filenodes:
455 474 fns = [(v, k) for k, v in filenodes[f].iteritems()]
456 475 for lr, node in sorted(fns):
457 476 self._err(lr, _("manifest refers to unknown revision %s") %
458 477 short(node), f)
459 478 progress.complete()
460 479
461 480 if self.warnorphanstorefiles:
462 481 for f in sorted(storefiles):
463 482 self._warn(_("warning: orphan data file '%s'") % f)
464 483
465 484 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now