##// END OF EJS Templates
verify: show progress while verifying dirlogs...
Martin von Zweigbergk -
r28205:53f42c8d default
parent child Browse files
Show More
@@ -1,422 +1,435 b''
1 1 # verify.py - repository integrity checking for Mercurial
2 2 #
3 3 # Copyright 2006, 2007 Matt Mackall <mpm@selenic.com>
4 4 #
5 5 # This software may be used and distributed according to the terms of the
6 6 # GNU General Public License version 2 or any later version.
7 7
8 8 from __future__ import absolute_import
9 9
10 10 import os
11 11
12 12 from .i18n import _
13 13 from .node import (
14 14 nullid,
15 15 short,
16 16 )
17 17
18 18 from . import (
19 19 error,
20 20 revlog,
21 21 util,
22 22 )
23 23
24 24 def verify(repo):
25 25 with repo.lock():
26 26 return verifier(repo).verify()
27 27
28 28 def _normpath(f):
29 29 # under hg < 2.4, convert didn't sanitize paths properly, so a
30 30 # converted repo may contain repeated slashes
31 31 while '//' in f:
32 32 f = f.replace('//', '/')
33 33 return f
34 34
35 35 def _validpath(repo, path):
36 36 """Returns False if a path should NOT be treated as part of a repo.
37 37
38 38 For all in-core cases, this returns True, as we have no way for a
39 39 path to be mentioned in the history but not actually be
40 40 relevant. For narrow clones, this is important because many
41 41 filelogs will be missing, and changelog entries may mention
42 42 modified files that are outside the narrow scope.
43 43 """
44 44 return True
45 45
46 46 class verifier(object):
47 47 def __init__(self, repo):
48 48 self.repo = repo.unfiltered()
49 49 self.ui = repo.ui
50 50 self.badrevs = set()
51 51 self.errors = 0
52 52 self.warnings = 0
53 53 self.havecl = len(repo.changelog) > 0
54 54 self.havemf = len(repo.manifest) > 0
55 55 self.revlogv1 = repo.changelog.version != revlog.REVLOGV0
56 56 self.lrugetctx = util.lrucachefunc(repo.changectx)
57 57 self.refersmf = False
58 58 self.fncachewarned = False
59 59
60 60 def warn(self, msg):
61 61 self.ui.warn(msg + "\n")
62 62 self.warnings += 1
63 63
64 64 def err(self, linkrev, msg, filename=None):
65 65 if linkrev is not None:
66 66 self.badrevs.add(linkrev)
67 67 else:
68 68 linkrev = '?'
69 69 msg = "%s: %s" % (linkrev, msg)
70 70 if filename:
71 71 msg = "%s@%s" % (filename, msg)
72 72 self.ui.warn(" " + msg + "\n")
73 73 self.errors += 1
74 74
75 75 def exc(self, linkrev, msg, inst, filename=None):
76 76 if not str(inst):
77 77 inst = repr(inst)
78 78 self.err(linkrev, "%s: %s" % (msg, inst), filename)
79 79
80 80 def checklog(self, obj, name, linkrev):
81 81 if not len(obj) and (self.havecl or self.havemf):
82 82 self.err(linkrev, _("empty or missing %s") % name)
83 83 return
84 84
85 85 d = obj.checksize()
86 86 if d[0]:
87 87 self.err(None, _("data length off by %d bytes") % d[0], name)
88 88 if d[1]:
89 89 self.err(None, _("index contains %d extra bytes") % d[1], name)
90 90
91 91 if obj.version != revlog.REVLOGV0:
92 92 if not self.revlogv1:
93 93 self.warn(_("warning: `%s' uses revlog format 1") % name)
94 94 elif self.revlogv1:
95 95 self.warn(_("warning: `%s' uses revlog format 0") % name)
96 96
97 97 def checkentry(self, obj, i, node, seen, linkrevs, f):
98 98 lr = obj.linkrev(obj.rev(node))
99 99 if lr < 0 or (self.havecl and lr not in linkrevs):
100 100 if lr < 0 or lr >= len(self.repo.changelog):
101 101 msg = _("rev %d points to nonexistent changeset %d")
102 102 else:
103 103 msg = _("rev %d points to unexpected changeset %d")
104 104 self.err(None, msg % (i, lr), f)
105 105 if linkrevs:
106 106 if f and len(linkrevs) > 1:
107 107 try:
108 108 # attempt to filter down to real linkrevs
109 109 linkrevs = [l for l in linkrevs
110 110 if self.lrugetctx(l)[f].filenode() == node]
111 111 except Exception:
112 112 pass
113 113 self.warn(_(" (expected %s)") % " ".join(map(str, linkrevs)))
114 114 lr = None # can't be trusted
115 115
116 116 try:
117 117 p1, p2 = obj.parents(node)
118 118 if p1 not in seen and p1 != nullid:
119 119 self.err(lr, _("unknown parent 1 %s of %s") %
120 120 (short(p1), short(node)), f)
121 121 if p2 not in seen and p2 != nullid:
122 122 self.err(lr, _("unknown parent 2 %s of %s") %
123 123 (short(p2), short(node)), f)
124 124 except Exception as inst:
125 125 self.exc(lr, _("checking parents of %s") % short(node), inst, f)
126 126
127 127 if node in seen:
128 128 self.err(lr, _("duplicate revision %d (%d)") % (i, seen[node]), f)
129 129 seen[node] = i
130 130 return lr
131 131
132 132 def verify(self):
133 133 repo = self.repo
134 134
135 135 ui = repo.ui
136 136
137 137 if not repo.url().startswith('file:'):
138 138 raise error.Abort(_("cannot verify bundle or remote repos"))
139 139
140 140 if os.path.exists(repo.sjoin("journal")):
141 141 ui.warn(_("abandoned transaction found - run hg recover\n"))
142 142
143 143 if ui.verbose or not self.revlogv1:
144 144 ui.status(_("repository uses revlog format %d\n") %
145 145 (self.revlogv1 and 1 or 0))
146 146
147 147 mflinkrevs, filelinkrevs = self._verifychangelog()
148 148
149 149 filenodes = self._verifymanifest(mflinkrevs)
150 150 del mflinkrevs
151 151
152 152 self._crosscheckfiles(filelinkrevs, filenodes)
153 153
154 154 totalfiles, filerevisions = self._verifyfiles(filenodes, filelinkrevs)
155 155
156 156 ui.status(_("%d files, %d changesets, %d total revisions\n") %
157 157 (totalfiles, len(repo.changelog), filerevisions))
158 158 if self.warnings:
159 159 ui.warn(_("%d warnings encountered!\n") % self.warnings)
160 160 if self.fncachewarned:
161 161 ui.warn(_('hint: run "hg debugrebuildfncache" to recover from '
162 162 'corrupt fncache\n'))
163 163 if self.errors:
164 164 ui.warn(_("%d integrity errors encountered!\n") % self.errors)
165 165 if self.badrevs:
166 166 ui.warn(_("(first damaged changeset appears to be %d)\n")
167 167 % min(self.badrevs))
168 168 return 1
169 169
170 170 def _verifychangelog(self):
171 171 ui = self.ui
172 172 repo = self.repo
173 173 cl = repo.changelog
174 174
175 175 ui.status(_("checking changesets\n"))
176 176 mflinkrevs = {}
177 177 filelinkrevs = {}
178 178 seen = {}
179 179 self.checklog(cl, "changelog", 0)
180 180 total = len(repo)
181 181 for i in repo:
182 182 ui.progress(_('checking'), i, total=total, unit=_('changesets'))
183 183 n = cl.node(i)
184 184 self.checkentry(cl, i, n, seen, [i], "changelog")
185 185
186 186 try:
187 187 changes = cl.read(n)
188 188 if changes[0] != nullid:
189 189 mflinkrevs.setdefault(changes[0], []).append(i)
190 190 self.refersmf = True
191 191 for f in changes[3]:
192 192 if _validpath(repo, f):
193 193 filelinkrevs.setdefault(_normpath(f), []).append(i)
194 194 except Exception as inst:
195 195 self.refersmf = True
196 196 self.exc(i, _("unpacking changeset %s") % short(n), inst)
197 197 ui.progress(_('checking'), None)
198 198 return mflinkrevs, filelinkrevs
199 199
200 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None):
200 def _verifymanifest(self, mflinkrevs, dir="", storefiles=None,
201 progress=None):
201 202 repo = self.repo
202 203 ui = self.ui
203 204 mf = self.repo.manifest.dirlog(dir)
204 205
205 206 if not dir:
206 207 self.ui.status(_("checking manifests\n"))
207 208
208 209 filenodes = {}
209 210 subdirnodes = {}
210 211 seen = {}
211 212 label = "manifest"
212 213 if dir:
213 214 label = dir
214 215 revlogfiles = mf.files()
215 216 storefiles.difference_update(revlogfiles)
217 if progress: # should be true since we're in a subdirectory
218 progress()
216 219 if self.refersmf:
217 220 # Do not check manifest if there are only changelog entries with
218 221 # null manifests.
219 222 self.checklog(mf, label, 0)
220 223 total = len(mf)
221 224 for i in mf:
222 225 if not dir:
223 226 ui.progress(_('checking'), i, total=total, unit=_('manifests'))
224 227 n = mf.node(i)
225 228 lr = self.checkentry(mf, i, n, seen, mflinkrevs.get(n, []), label)
226 229 if n in mflinkrevs:
227 230 del mflinkrevs[n]
228 231 elif dir:
229 232 self.err(lr, _("%s not in parent-directory manifest") %
230 233 short(n), label)
231 234 else:
232 235 self.err(lr, _("%s not in changesets") % short(n), label)
233 236
234 237 try:
235 238 for f, fn, fl in mf.readshallowdelta(n).iterentries():
236 239 if not f:
237 240 self.err(lr, _("entry without name in manifest"))
238 241 elif f == "/dev/null": # ignore this in very old repos
239 242 continue
240 243 fullpath = dir + _normpath(f)
241 244 if not _validpath(repo, fullpath):
242 245 continue
243 246 if fl == 't':
244 247 subdirnodes.setdefault(fullpath + '/', {}).setdefault(
245 248 fn, []).append(lr)
246 249 else:
247 250 filenodes.setdefault(fullpath, {}).setdefault(fn, lr)
248 251 except Exception as inst:
249 252 self.exc(lr, _("reading delta %s") % short(n), inst, label)
250 253 if not dir:
251 254 ui.progress(_('checking'), None)
252 255
253 256 if self.havemf:
254 257 for c, m in sorted([(c, m) for m in mflinkrevs
255 258 for c in mflinkrevs[m]]):
256 259 if dir:
257 260 self.err(c, _("parent-directory manifest refers to unknown "
258 261 "revision %s") % short(m), label)
259 262 else:
260 263 self.err(c, _("changeset refers to unknown revision %s") %
261 264 short(m), label)
262 265
263 266 if not dir and subdirnodes:
264 267 self.ui.status(_("checking directory manifests\n"))
265 268 storefiles = set()
269 subdirs = set()
266 270 revlogv1 = self.revlogv1
267 271 for f, f2, size in repo.store.datafiles():
268 272 if not f:
269 273 self.err(None, _("cannot decode filename '%s'") % f2)
270 274 elif (size > 0 or not revlogv1) and f.startswith('meta/'):
271 275 storefiles.add(_normpath(f))
276 subdirs.add(os.path.dirname(f))
277 subdircount = len(subdirs)
278 currentsubdir = [0]
279 def progress():
280 currentsubdir[0] += 1
281 ui.progress(_('checking'), currentsubdir[0], total=subdircount,
282 unit=_('manifests'))
272 283
273 284 for subdir, linkrevs in subdirnodes.iteritems():
274 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles)
285 subdirfilenodes = self._verifymanifest(linkrevs, subdir, storefiles,
286 progress)
275 287 for f, onefilenodes in subdirfilenodes.iteritems():
276 288 filenodes.setdefault(f, {}).update(onefilenodes)
277 289
278 290 if not dir and subdirnodes:
291 ui.progress(_('checking'), None)
279 292 for f in sorted(storefiles):
280 293 self.warn(_("warning: orphan revlog '%s'") % f)
281 294
282 295 return filenodes
283 296
284 297 def _crosscheckfiles(self, filelinkrevs, filenodes):
285 298 repo = self.repo
286 299 ui = self.ui
287 300 ui.status(_("crosschecking files in changesets and manifests\n"))
288 301
289 302 total = len(filelinkrevs) + len(filenodes)
290 303 count = 0
291 304 if self.havemf:
292 305 for f in sorted(filelinkrevs):
293 306 count += 1
294 307 ui.progress(_('crosschecking'), count, total=total)
295 308 if f not in filenodes:
296 309 lr = filelinkrevs[f][0]
297 310 self.err(lr, _("in changeset but not in manifest"), f)
298 311
299 312 if self.havecl:
300 313 for f in sorted(filenodes):
301 314 count += 1
302 315 ui.progress(_('crosschecking'), count, total=total)
303 316 if f not in filelinkrevs:
304 317 try:
305 318 fl = repo.file(f)
306 319 lr = min([fl.linkrev(fl.rev(n)) for n in filenodes[f]])
307 320 except Exception:
308 321 lr = None
309 322 self.err(lr, _("in manifest but not in changeset"), f)
310 323
311 324 ui.progress(_('crosschecking'), None)
312 325
313 326 def _verifyfiles(self, filenodes, filelinkrevs):
314 327 repo = self.repo
315 328 ui = self.ui
316 329 lrugetctx = self.lrugetctx
317 330 revlogv1 = self.revlogv1
318 331 havemf = self.havemf
319 332 ui.status(_("checking files\n"))
320 333
321 334 storefiles = set()
322 335 for f, f2, size in repo.store.datafiles():
323 336 if not f:
324 337 self.err(None, _("cannot decode filename '%s'") % f2)
325 338 elif (size > 0 or not revlogv1) and f.startswith('data/'):
326 339 storefiles.add(_normpath(f))
327 340
328 341 files = sorted(set(filenodes) | set(filelinkrevs))
329 342 total = len(files)
330 343 revisions = 0
331 344 for i, f in enumerate(files):
332 345 ui.progress(_('checking'), i, item=f, total=total)
333 346 try:
334 347 linkrevs = filelinkrevs[f]
335 348 except KeyError:
336 349 # in manifest but not in changelog
337 350 linkrevs = []
338 351
339 352 if linkrevs:
340 353 lr = linkrevs[0]
341 354 else:
342 355 lr = None
343 356
344 357 try:
345 358 fl = repo.file(f)
346 359 except error.RevlogError as e:
347 360 self.err(lr, _("broken revlog! (%s)") % e, f)
348 361 continue
349 362
350 363 for ff in fl.files():
351 364 try:
352 365 storefiles.remove(ff)
353 366 except KeyError:
354 367 self.warn(_(" warning: revlog '%s' not in fncache!") % ff)
355 368 self.fncachewarned = True
356 369
357 370 self.checklog(fl, f, lr)
358 371 seen = {}
359 372 rp = None
360 373 for i in fl:
361 374 revisions += 1
362 375 n = fl.node(i)
363 376 lr = self.checkentry(fl, i, n, seen, linkrevs, f)
364 377 if f in filenodes:
365 378 if havemf and n not in filenodes[f]:
366 379 self.err(lr, _("%s not in manifests") % (short(n)), f)
367 380 else:
368 381 del filenodes[f][n]
369 382
370 383 # verify contents
371 384 try:
372 385 l = len(fl.read(n))
373 386 rp = fl.renamed(n)
374 387 if l != fl.size(i):
375 388 if len(fl.revision(n)) != fl.size(i):
376 389 self.err(lr, _("unpacked size is %s, %s expected") %
377 390 (l, fl.size(i)), f)
378 391 except error.CensoredNodeError:
379 392 # experimental config: censor.policy
380 393 if ui.config("censor", "policy", "abort") == "abort":
381 394 self.err(lr, _("censored file data"), f)
382 395 except Exception as inst:
383 396 self.exc(lr, _("unpacking %s") % short(n), inst, f)
384 397
385 398 # check renames
386 399 try:
387 400 if rp:
388 401 if lr is not None and ui.verbose:
389 402 ctx = lrugetctx(lr)
390 403 found = False
391 404 for pctx in ctx.parents():
392 405 if rp[0] in pctx:
393 406 found = True
394 407 break
395 408 if not found:
396 409 self.warn(_("warning: copy source of '%s' not"
397 410 " in parents of %s") % (f, ctx))
398 411 fl2 = repo.file(rp[0])
399 412 if not len(fl2):
400 413 self.err(lr, _("empty or missing copy source "
401 414 "revlog %s:%s") % (rp[0], short(rp[1])), f)
402 415 elif rp[1] == nullid:
403 416 ui.note(_("warning: %s@%s: copy source"
404 417 " revision is nullid %s:%s\n")
405 418 % (f, lr, rp[0], short(rp[1])))
406 419 else:
407 420 fl2.rev(rp[1])
408 421 except Exception as inst:
409 422 self.exc(lr, _("checking rename of %s") % short(n), inst, f)
410 423
411 424 # cross-check
412 425 if f in filenodes:
413 426 fns = [(lr, n) for n, lr in filenodes[f].iteritems()]
414 427 for lr, node in sorted(fns):
415 428 self.err(lr, _("manifest refers to unknown revision %s") %
416 429 short(node), f)
417 430 ui.progress(_('checking'), None)
418 431
419 432 for f in sorted(storefiles):
420 433 self.warn(_("warning: orphan revlog '%s'") % f)
421 434
422 435 return len(files), revisions
General Comments 0
You need to be logged in to leave comments. Login now